In [1]:
from faker import Faker
import random
import pandas as pd

fake = Faker()

# Predefined specializations
specializations = [
    "Criminal Law", "Business Law", "Family Law", "Labor Law", 
    "Civil Rights Law", "Tax Law", "Real Estate Law", "Intellectual Property Law", 
    "Bankruptcy Law", "Personal Injury Law", "Environmental Law", "Estate Planning Law", 
    "Corporate Law", "Immigration Law", "Contract Law"
]

def random_specializations(n=5):
    return random.sample(specializations, n)

# Generate Lawyers Data
def generate_lawyers(n=50):
    lawyers_data = []
    for _ in range(n):
        lawyers_data.append({
            "lawyer_id": fake.uuid4(),
            "first_name": fake.first_name(),
            "last_name": fake.last_name(),
            "email": fake.email(),
            "specializations": random_specializations(),
            "rating": round(random.uniform(1, 5), 2),  # Random rating between 1 and 5
            "years_of_experience": random.randint(1, 30)
        })
    return pd.DataFrame(lawyers_data)

# Generate Clients Data
def generate_clients(n=100):
    clients_data = []
    for _ in range(n):
        clients_data.append({
            "client_id": fake.uuid4(),
            "first_name": fake.first_name(),
            "last_name": fake.last_name(),
            "email": fake.email(),
            "preferences": random_specializations(),
        })
    return pd.DataFrame(clients_data)

# Generate Client-Lawyer Interactions
def generate_interactions(n=200, lawyers=None, clients=None):
    interactions_data = []
    for _ in range(n):
        interactions_data.append({
            "interaction_id": fake.uuid4(),
            "lawyer_id": random.choice(lawyers['lawyer_id'].values),
            "client_id": random.choice(clients['client_id'].values),
            "interaction_type": random.choice(["message", "appointment", "inquiry"]),
            "timestamp": fake.date_time_this_year(),
        })
    return pd.DataFrame(interactions_data)

# Generate Lawyer Rating Data
def generate_lawyer_ratings(n=200, lawyers=None, clients=None):
    ratings_data = []
    for _ in range(n):
        ratings_data.append({
            "rating_id": fake.uuid4(),
            "client_id": random.choice(clients['client_id'].values),
            "lawyer_id": random.choice(lawyers['lawyer_id'].values),
            "ratings": round(random.uniform(1, 5), 2),  # Random rating between 1 and 5
            "created_at": fake.date_time_this_year(),
        })
    return pd.DataFrame(ratings_data)

# Generate Lawyer Profile View Data
def generate_lawyer_profile_views(n=200, lawyers=None, clients=None):
    views_data = []
    for _ in range(n):
        views_data.append({
            "view_id": fake.uuid4(),
            "client_id": random.choice(clients['client_id'].values),
            "lawyer_id": random.choice(lawyers['lawyer_id'].values),
            "added_at": fake.date_time_this_year(),
        })
    return pd.DataFrame(views_data)

# Generate data
lawyers_df = generate_lawyers(50)
clients_df = generate_clients(100)
interactions_df = generate_interactions(200, lawyers_df, clients_df)
lawyer_ratings = generate_lawyer_ratings(200, lawyers_df, clients_df)
lawyer_profile_views = generate_lawyer_profile_views(200, lawyers_df, clients_df)

# Display the first few rows of each DataFrame
print("Lawyers Data:\n", lawyers_df.head())
print("\nClients Data:\n", clients_df.head())
print("\nInteractions Data:\n", interactions_df.head())


Lawyers Data:
                               lawyer_id first_name last_name  \
0  81a56626-2354-46fa-acc8-70216daf4bc4     Dustin   Mathews   
1  d0cc347c-1af0-416f-b575-6053df8f86f2       Cody   Nichols   
2  46a50ee3-2e28-4fa0-9d36-65cd2793f979     Jeremy  Martinez   
3  c3dfd914-f680-4d81-8428-48eb520b2017      Emily  Williams   
4  6d5ceb0c-c708-4061-9351-48c3bc5189ee     Joshua    Haynes   

                        email  \
0  donnacervantes@example.org   
1     holdenmaria@example.net   
2       bcampbell@example.com   
3  timothycarlson@example.net   
4       william40@example.net   

                                     specializations  rating  \
0  [Corporate Law, Immigration Law, Business Law,...    4.55   
1  [Bankruptcy Law, Contract Law, Environmental L...    4.22   
2  [Estate Planning Law, Immigration Law, Crimina...    1.26   
3  [Business Law, Bankruptcy Law, Environmental L...    3.21   
4  [Labor Law, Corporate Law, Criminal Law, Envir...    1.82   

   years_of_expe

In [2]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.layers import StringLookup, Embedding, Concatenate, Dense
from tensorflow.keras import layers
import numpy as np
# Binarize specializations and preferences
mlb = MultiLabelBinarizer()
specializations_binarized = mlb.fit_transform(lawyers_df['specializations'])
preferences_binarized = mlb.transform(clients_df['preferences'])

def process_interactions(interactions_df, lawyers_df, clients_df):
    interaction_counts = interactions_df.groupby(['client_id', 'lawyer_id']).size().reset_index(name='counts')
    interaction_counts = interaction_counts.merge(lawyers_df[['lawyer_id', 'specializations']], on='lawyer_id')
    interaction_counts = interaction_counts.explode('specializations').groupby(['client_id', 'specializations']).sum().reset_index()

    client_interaction_matrix = interaction_counts.pivot(index='client_id', columns='specializations', values='counts').fillna(0)

    # Ensure all clients are included, even those without interactions
    client_interaction_matrix = client_interaction_matrix.reindex(clients_df['client_id']).fillna(0)
    client_interaction_matrix = client_interaction_matrix.reindex(columns=mlb.classes_, fill_value=0)

    return client_interaction_matrix

client_interaction_matrix = process_interactions(interactions_df, lawyers_df, clients_df)

# Now combine preferences and interaction features
combined_client_features = np.hstack((preferences_binarized, client_interaction_matrix.values))





  interaction_counts = interaction_counts.explode('specializations').groupby(['client_id', 'specializations']).sum().reset_index()


In [3]:
class LawyerRecommenderModel(tfrs.Model):
    def __init__(self, embedding_dimension, num_features):
        super().__init__()
        self.client_model = tf.keras.Sequential([
            layers.Dense(embedding_dimension, activation='relu'),
            layers.Dense(embedding_dimension, activation='relu')
        ])
        self.lawyer_model = tf.keras.Sequential([
            layers.Dense(embedding_dimension, activation='relu'),
            layers.Dense(embedding_dimension, activation='relu')
        ])
        self.task = tfrs.tasks.Retrieval()

    def compute_loss(self, features, training=False):
        client_embeddings = self.client_model(features[0])
        lawyer_embeddings = self.lawyer_model(features[1])
        return self.task(client_embeddings, lawyer_embeddings)

# Prepare the TensorFlow datasets
tf_client_features = tf.data.Dataset.from_tensor_slices(combined_client_features).batch(10)
tf_lawyer_features = tf.data.Dataset.from_tensor_slices(specializations_binarized).batch(10)

# Train the model
model = LawyerRecommenderModel(embedding_dimension=32, num_features=combined_client_features.shape[1])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(tf.data.Dataset.zip((tf_client_features, tf_lawyer_features)), epochs=10)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x293424844d0>

In [4]:
# Prepare the TensorFlow datasets
tf_client_features = tf.data.Dataset.from_tensor_slices(combined_client_features).batch(10)
tf_lawyer_features = tf.data.Dataset.from_tensor_slices(specializations_binarized).batch(10)

# Train the model
model = LawyerRecommenderModel(embedding_dimension=32, num_features=combined_client_features.shape[1])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(tf.data.Dataset.zip((tf_client_features, tf_lawyer_features)), epochs=10)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x29342594810>

In [5]:
def recommend_lawyers_hybrid(client_id, top_n=5):
    client_idx = clients_df.index[clients_df['client_id'] == client_id].tolist()[0]
    client_features = combined_client_features[client_idx:client_idx+1]

    client_embedding = model.client_model.predict(client_features)
    lawyer_embeddings = model.lawyer_model.predict(specializations_binarized)
    similarities = tf.linalg.matmul(client_embedding, lawyer_embeddings, transpose_b=True)
    
    top_indices = tf.argsort(similarities, axis=-1, direction='DESCENDING')[0, :top_n].numpy()
    recommended_lawyers = lawyers_df.iloc[top_indices]
    
    return recommended_lawyers

# Test the recommendation function with a client ID
test_client_id = clients_df['client_id'].iloc[0]  # Replace with an actual client ID
hyb_recommended_lawyers = recommend_lawyers_hybrid(test_client_id)
print("Recommended Lawyers:\n", hyb_recommended_lawyers)

Recommended Lawyers:
                                lawyer_id first_name  last_name  \
22  b67a99c1-9b6a-4435-95d2-3a082145ebf7     Angela       Hale   
33  fa945013-ab26-4bc8-b2d9-6d9d377ee6f3      Katie     Turner   
29  0ad94812-4aa6-4a25-8687-364dc67ebaca     Ronald  Hernandez   
0   81a56626-2354-46fa-acc8-70216daf4bc4     Dustin    Mathews   
43  f360c4ff-0a94-4382-b960-cb230982f16b     Ariana     Tucker   

                         email  \
22  jasminesummers@example.com   
33      johnsjacob@example.net   
29         louis79@example.com   
0   donnacervantes@example.org   
43        rachel93@example.net   

                                      specializations  rating  \
22  [Intellectual Property Law, Business Law, Corp...    2.40   
33  [Personal Injury Law, Intellectual Property La...    2.41   
29  [Corporate Law, Contract Law, Intellectual Pro...    1.57   
0   [Corporate Law, Immigration Law, Business Law,...    4.55   
43  [Immigration Law, Criminal Law, Tax Law, Labor.

CREATING COLLABORATIVE FILTERING

In [6]:
import pandas as pd
import random
from faker import Faker
import tensorflow as tf
import tensorflow_recommenders as tfrs

# Preparing the combined data
combined_data = pd.concat([
    lawyer_ratings[['client_id', 'lawyer_id']].assign(interaction=1),
    lawyer_profile_views[['client_id', 'lawyer_id']].assign(interaction=0.5)
])

# Mapping string IDs to integers
user_ids_mapping = {id: i for i, id in enumerate(combined_data['client_id'].unique())}
lawyer_ids_mapping = {id: i for i, id in enumerate(combined_data['lawyer_id'].unique())}

combined_data['client_id'] = combined_data['client_id'].map(user_ids_mapping)
combined_data['lawyer_id'] = combined_data['lawyer_id'].map(lawyer_ids_mapping)

# Building the TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices({
    "client_id": combined_data['client_id'].values,
    "lawyer_id": combined_data['lawyer_id'].values,
    "interaction": combined_data['interaction'].values
})

# Define the model
class LawyerRecommender(tfrs.models.Model):

    def __init__(self, user_model, lawyer_model):
        super().__init__()
        self.lawyer_model: tf.keras.Model = lawyer_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = tfrs.tasks.Retrieval()

    def compute_loss(self, features, training=False):
        user_embeddings = self.user_model(features["client_id"])
        lawyer_embeddings = self.lawyer_model(features["lawyer_id"])

        return self.task(user_embeddings, lawyer_embeddings, sample_weight=features["interaction"])

# Define the user and lawyer models
embedding_dimension = 32

user_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(user_ids_mapping) + 1, embedding_dimension)
])

lawyer_model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(lawyer_ids_mapping) + 1, embedding_dimension)
])

# Instantiate and compile the model
model = LawyerRecommender(user_model, lawyer_model)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Train the model
model.fit(dataset.batch(256), epochs=5)


Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x293426b72d0>

In [8]:
# Function to find top K recommended lawyers for a given client
def recommend_lawyers_using_collaborative_filtering(model, client_id, lawyer_ids_mapping, k=5):
    # Ensure the client_id is mapped
    client_id_mapped = user_ids_mapping.get(client_id)
    if client_id_mapped is None:
        raise ValueError("Client ID not found in the dataset")

    # Convert client_id to tensor
    client_id_tensor = tf.constant([client_id_mapped], dtype=tf.int64)

    # Compute embeddings for the given client
    client_embedding = model.user_model(client_id_tensor)

    # Correct way to get embeddings from the lawyer model
    lawyer_embeddings = model.lawyer_model.layers[0].embeddings.numpy()

    # Compute the scores
    scores = tf.tensordot(client_embedding, lawyer_embeddings, axes=[[1], [1]])
    scores = tf.squeeze(scores)

    # Find the top K indexes
    top_k_indexes = tf.math.top_k(scores, k=k).indices.numpy()

    # Map back to lawyer IDs
    reverse_lawyer_ids_mapping = {v: k for k, v in lawyer_ids_mapping.items()}
    recommended_lawyer_ids = [reverse_lawyer_ids_mapping[idx] for idx in top_k_indexes]

    return recommended_lawyer_ids

# Example: Get recommendations for a specific client
client_id = '86261da4-c95e-4701-a986-920557720886'  # Replace with a valid client ID from your dataset
try:
    cf_recommended_lawyers = recommend_lawyers_using_collaborative_filtering(model, client_id, lawyer_ids_mapping)
    print("Recommended Lawyers for Client ID", client_id, ":", cf_recommended_lawyers)
except ValueError as e:
    print(e)


Recommended Lawyers for Client ID 86261da4-c95e-4701-a986-920557720886 : ['de4cc2d7-30c4-4a96-bec6-2aeca77a2065', 'd014cdbb-e158-4aa4-b17d-93e193e42ab9', '0ab5db91-fe06-47e4-a8da-595b760d1a1c', '6d5ceb0c-c708-4061-9351-48c3bc5189ee', '0f56c877-d7d4-491a-a781-5aaf82e000df']


In [7]:
clients_df

Unnamed: 0,client_id,first_name,last_name,email,preferences
0,4eda1064-e337-4cff-a274-345ea7857ea1,Carol,Walker,gardnerlisa@example.com,"[Contract Law, Criminal Law, Immigration Law, ..."
1,e1a0591b-966b-4a2d-834c-7a5896c364ee,Lisa,Harper,perrymaria@example.org,"[Labor Law, Real Estate Law, Bankruptcy Law, C..."
2,8f6e8077-ffa8-45f8-893e-b2472730d82f,Marcus,Mckee,joyjones@example.net,"[Immigration Law, Civil Rights Law, Contract L..."
3,3b01536a-d912-4180-a05e-3978cad344b3,Ashley,Griffith,stephengreene@example.net,"[Family Law, Civil Rights Law, Immigration Law..."
4,a24fd714-d904-474e-ae2e-d0558dff4636,David,Hughes,baldwinjoseph@example.com,"[Real Estate Law, Corporate Law, Bankruptcy La..."
...,...,...,...,...,...
95,1b01f82f-a1ef-4ff6-ae41-08305f7f626f,Brittany,Ramirez,wrightmatthew@example.org,"[Business Law, Intellectual Property Law, Bank..."
96,2744dd7f-eb6d-419a-9f54-cc984f7f8702,Gregory,Chan,chase55@example.com,"[Civil Rights Law, Environmental Law, Family L..."
97,ee9d469a-005a-44e4-8c2e-ac3bfaf71c1b,Chelsea,Palmer,barbara46@example.net,"[Estate Planning Law, Immigration Law, Busines..."
98,980f9224-6946-4c94-a2dd-6cfc8323fffb,Joseph,Harris,gabrielleshepard@example.org,"[Intellectual Property Law, Family Law, Immigr..."


In [10]:
def combined_recommendation(client_id, lawyer_ids_mapping, model, k=5):
    try:
        # Get recommendations from collaborative filtering
        cf_recommended_lawyers = recommend_lawyers_using_collaborative_filtering(model, client_id, lawyer_ids_mapping, k)
    except ValueError as e:
        print(e)
        cf_recommended_lawyers = []
    
    # Get recommendations from hybrid model
    hyb_recommended_lawyers = recommend_lawyers_hybrid(client_id, top_n=k)
    hyb_recommended_lawyer_ids = hyb_recommended_lawyers['lawyer_id'].tolist()

    # Combine the recommendations
    combined_recommendations = list(set(cf_recommended_lawyers + hyb_recommended_lawyer_ids))

    # Score lawyers: More occurrences across recommendations mean higher relevance
    lawyer_scores = {}
    for lawyer_id in combined_recommendations:
        score = (cf_recommended_lawyers.count(lawyer_id) + hyb_recommended_lawyer_ids.count(lawyer_id))
        lawyer_scores[lawyer_id] = score

    # Sort lawyers by their score in descending order
    sorted_lawyers = sorted(lawyer_scores, key=lawyer_scores.get, reverse=True)

    # Return the top K lawyers
    return sorted_lawyers[:k]

# Test the combined recommendation function
test_client_id = '86261da4-c95e-4701-a986-920557720886'  # Replace with a valid client ID from your dataset
combined_lawyers = combined_recommendation(test_client_id, lawyer_ids_mapping, model)
print("Combined Recommended Lawyers for Client ID", test_client_id, ":", combined_lawyers)


AttributeError: 'LawyerRecommender' object has no attribute 'client_model'