Generating Fake Data that will be used in our recommendation model

In [None]:
#importing libraries
%pip install faker
%pip install tensorflow
%pip install tensorflow_recommenders

In [None]:
from faker import Faker
import pandas as pd
import numpy as np
import random
import tensorflow as tf
import tensorflow_recommenders as tfrs
from sklearn.model_selection import train_test_split

In [None]:
fake = Faker()

# Function to generate fake lawyer data
lawyer_id_counter = 1  # Initialize a counter

def generate_lawyer_data(num_lawyers):
    lawyer_data = []
    for _ in range(num_lawyers):
        lawyer_data.append({
            'lawyer_id': fake.uuid4(),
            'first_name': fake.first_name(),
            'last_name': fake.last_name(),
            'email': fake.email(),
            'ph_number': fake.phone_number(),
            'address': fake.address(),
            'password': fake.password(),
            'specializations': random.sample(['Personal Injury', 'Criminal', 'Family', 'Immigration', 'Business', 'Estate Planning'], k=random.randint(1, 3)),
            'years_of_experience': random.randint(1, 30),
            'universities': fake.company(),
            'rating': round(random.uniform(1, 5), 2),
            'created_at': fake.date_time_this_decade(),
            'updated_at': fake.date_time_this_decade(),
            'profile_picture': fake.image_url(),
            'verified': fake.boolean(),
            'account_type': 'lawyer'
        })
    return pd.DataFrame(lawyer_data)

# Function to generate fake client data
def generate_client_data(num_clients):
    client_data = []
    for _ in range(num_clients):
        client_data.append({
            'client_id': fake.uuid4(),
            'first_name': fake.first_name(),
            'last_name': fake.last_name(),
            'email': fake.email(),
            'ph_number': fake.phone_number(),
            'address': fake.address(),
            'password': fake.password(),
            'created_at': fake.date_time_this_decade(),
            'updated_at': fake.date_time_this_decade(),
            'profile_picture': fake.image_url(),
            'verified': fake.boolean(),
            'account_type': 'client',
            'preferences': random.sample(['Personal Injury', 'Criminal', 'Family', 'Immigration', 'Business', 'Estate Planning'], k=random.randint(1, 3))
        })
    return pd.DataFrame(client_data)

# Function to generate fake client interactions
def generate_client_interactions(num_interactions, lawyer_ids, client_ids):
    interaction_data = []
    for _ in range(num_interactions):
        interaction_data.append({
            'interaction_id': fake.uuid4(),
            'interaction_type': random.choice(['meeting', 'call', 'message']),
            'timestamp': fake.date_time_this_year(),
            'lawyer_id': random.choice(lawyer_ids),
            'client_id': random.choice(client_ids),
            'context': fake.sentence()
        })
    return pd.DataFrame(interaction_data)

# Function to generate fake lawyer ratings
def generate_lawyer_ratings(num_ratings, lawyer_ids, client_ids):
    rating_data = []
    for _ in range(num_ratings):
        rating_data.append({
            'rating_id': fake.uuid4(),
            'client_id': random.choice(client_ids),
            'lawyer_id': random.choice(lawyer_ids),
            'ratings': round(random.uniform(1, 5), 2),
            'created_at': fake.date_time_this_decade()
        })
    return pd.DataFrame(rating_data)



In [None]:
# Generate data for lawyers and clients
num_lawyers = 100
num_clients = 200

lawyer_data = generate_lawyer_data(num_lawyers)
client_data = generate_client_data(num_clients)
# Generate data for interactions and ratings
interaction_data = generate_client_interactions(num_interactions=500, lawyer_ids=lawyer_data['lawyer_id'].unique(), client_ids=client_data['client_id'].unique())
rating_data = generate_lawyer_ratings(num_ratings=300, lawyer_ids=lawyer_data['lawyer_id'].unique(), client_ids=client_data['client_id'].unique())

Displaying the interaction data

In [None]:
print("Lawyer Data:")
lawyer_data.head()

In [None]:
print("\nClient Data:")
client_data.head()


In [75]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

# Predefined specializations
specializations = [
    "Criminal Law", "Business Law", "Family Law", "Labor Law", 
    "Civil Rights Law", "Tax Law", "Real Estate Law", "Intellectual Property Law", 
    "Bankruptcy Law", "Personal Injury Law", "Environmental Law", "Estate Planning Law", 
    "Corporate Law", "Immigration Law", "Contract Law"
]

# Generate synthetic data for lawyers
def generate_lawyers(n=50):
    return pd.DataFrame({
        "lawyer_id": [f"lawyer_{i+1}" for i in range(n)],
        "specialization": np.random.choice(specializations, n)
    })

# Generate synthetic data for clients
def generate_clients(n=20):
    return pd.DataFrame({
        "client_id": [f"client_{i+1}" for i in range(n)],
        "preference": np.random.choice(specializations, n)
    })

lawyers_df = generate_lawyers()
clients_df = generate_clients()

# Map specializations to integers
specialization_to_id = {specialization: idx for idx, specialization in enumerate(specializations)}
lawyers_df["specialization_id"] = lawyers_df["specialization"].map(specialization_to_id)
clients_df["preference_id"] = clients_df["preference"].map(specialization_to_id)

# Prepare the dataset for TensorFlow
lawyers_dataset = tf.data.Dataset.from_tensor_slices(lawyers_df["specialization_id"])
clients_dataset = tf.data.Dataset.from_tensor_slices(clients_df["preference_id"])

embedding_dimension = 32

# Lawyer and client models
lawyer_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(specializations) + 1, embedding_dimension)
])

client_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(specializations) + 1, embedding_dimension)
])

# Define the TFRS model
class LawyerRecommender(tfrs.Model):

    def __init__(self, lawyer_model, client_model):
        super().__init__()
        self.lawyer_model = lawyer_model
        self.client_model = client_model

        # Retrieval task
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=lawyers_dataset.batch(128).map(self.lawyer_model)
            )
        )

    def compute_loss(self, features, training=False):
        lawyer_embeddings = self.lawyer_model(features["specialization_id"])
        client_embeddings = self.client_model(features["preference_id"])

        return self.task(client_embeddings, lawyer_embeddings)

# Create and compile the model
model = LawyerRecommender(lawyer_model, client_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Interaction data (fake)
interaction_data = tf.data.Dataset.from_tensor_slices({
    "specialization_id": clients_df["preference_id"],
    "preference_id": clients_df["preference_id"]
}).batch(10)

# Train the model
model.fit(interaction_data, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2bcd59dc9d0>