In [41]:
# Install & Imports
!pip install mlxtend tabulate

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)  # hide utcnow() spam

import pandas as pd
from mlxtend.frequent_patterns import apriori
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from tabulate import tabulate



In [42]:
# Load your dataset
file_path = "Dummy_Lawyers.csv"
df = pd.read_csv(file_path)

# Map to expected schema
df = df.rename(columns={
    "Years of active experience": "experience",
    "Domain": "specialization",
    "Jurisdiction": "location"
})

# Clean values
df["experience"] = df["experience"].fillna(0).astype(int)
df["specialization"] = df["specialization"].fillna("").astype(str)
df["specialization_list"] = df["specialization"].str.lower().str.split(r',\s*', regex=True)

print("✅ Dataset loaded and prepared")
print(df.head())


✅ Dataset loaded and prepared
               Name specialization           location  experience     Price  \
0      Sharon Weiss        Tax Law  Bombay High Court          27  29654.73   
1       Leah Ritter   Criminal Law  Bombay High Court          30  18833.78   
2  Andrew Gutierrez        Tax Law  Bombay High Court          36  36825.07   
3        John Black   Criminal Law  Bombay High Court          19  26951.06   
4     Jeffrey Banks      Civil Law  Bombay High Court           3  18255.58   

   No of cases fought  No of cases running  No of cases settled  \
0                 114                   23                   91   
1                 216                   12                  204   
2                 197                    1                  196   
3                 492                  128                  364   
4                 272                   32                  240   

   No of favoured settlements  No of unfavoured settlements  \
0                          81

In [43]:
# APriori Frequent Itemsets
# Build binary matrix for Apriori
specialization_list = [spec.strip() for sublist in df['specialization_list'] for spec in sublist]
specialization_set = list(set(specialization_list))
binary_matrix = pd.DataFrame(0, index=df['Name'], columns=specialization_set)

for i, row in df.iterrows():
    for spec in row['specialization_list']:
        if spec.strip():
            binary_matrix.loc[row['Name'], spec.strip()] = 1

# Frequent itemsets
frequent_itemsets = apriori(binary_matrix, min_support=0.1, use_colnames=True)

print("📊 Frequent Specialization Patterns:")
print(frequent_itemsets.sort_values("support", ascending=False).head(10))

📊 Frequent Specialization Patterns:
   support         itemsets
5    0.149      (labor law)
7    0.137        (tax law)
1    0.135   (criminal law)
6    0.128     (family law)
3    0.126  (corporate law)
0    0.119   (property law)
2    0.110      (civil law)
4    0.108         (ip law)


In [44]:
def recommend_hybrid(domain=None, min_exp=None, max_exp=None, top_n=5):
    fdf = df.copy()

    # Apply filters
    if domain:
        fdf = fdf[fdf["specialization"].str.lower().str.contains(domain.lower())]
    if min_exp is not None:
        fdf = fdf[fdf["experience"] >= min_exp]
    if max_exp is not None:
        fdf = fdf[fdf["experience"] <= max_exp]

    if fdf.empty:
        print("⚠️ No lawyers match the filters.")
        return

    # Add specialization count & score
    fdf["spec_count"] = fdf["specialization_list"].apply(len)
    fdf["score"] = fdf["experience"] + fdf["spec_count"]

    # Cluster lawyers
    scaler = StandardScaler()
    features = scaler.fit_transform(fdf[["experience", "spec_count"]])
    n_clusters = min(3, len(fdf))
    if n_clusters > 1:
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        fdf["cluster"] = kmeans.fit_predict(features)
    else:
        fdf["cluster"] = 0

    # Rank within clusters
    recommended = fdf.sort_values(["cluster", "score"], ascending=[True, False]).head(top_n)

    print("\n🔎 Top Recommended Lawyers:")
    print(tabulate(recommended[["Name", "experience", "specialization", "location", "score"]],
                   headers="keys", tablefmt="grid", showindex=False))
    return recommended

In [38]:
def recommend_hybrid(user_query):
    filters = parse_query(user_query)
    fdf = df.copy()

    # Apply filters
    if filters["min_exp"] is not None:
        fdf = fdf[fdf["experience"] >= filters["min_exp"]]
    if filters["max_exp"] is not None:
        fdf = fdf[fdf["experience"] <= filters["max_exp"]]
    if filters["include_cities"]:
        fdf = fdf[fdf["city"].isin([c.lower() for c in filters["include_cities"]])]
    if filters["exclude_cities"]:
        fdf = fdf[~fdf["city"].isin([c.lower() for c in filters["exclude_cities"]])]
    if filters["include_specs"]:
        fdf = fdf[fdf["specialization"].apply(lambda s: any(inc in s for inc in filters["include_specs"]))]
    if filters["exclude_specs"]:
        fdf = fdf[~fdf["specialization"].apply(lambda s: any(exc in s for exc in filters["exclude_specs"]))]

    if fdf.empty:
        print("⚠️ No lawyers found matching criteria.")
        return

    # Semantic similarity scoring
    query_embedding = model.encode(user_query, convert_to_tensor=True).cpu()
    def embed(row):
        text = f"{row['Name']} {row['experience']} years {row['specialization']} {row['location']}"
        return model.encode(text, convert_to_tensor=True).cpu()

    fdf["embedding"] = fdf.apply(embed, axis=1)
    fdf["similarity"] = fdf["embedding"].apply(lambda emb: util.pytorch_cos_sim(query_embedding, emb).item())
    fdf["rank"] = fdf["similarity"] + (fdf["experience"] / 50)

    # Clustering
    fdf["spec_count"] = fdf["specialization_list"].apply(len)
    scaler = StandardScaler()
    features = scaler.fit_transform(fdf[["experience", "spec_count"]])

    if len(fdf) > 1:
        kmeans = KMeans(n_clusters=min(3, len(fdf)), random_state=42)
        fdf["cluster"] = kmeans.fit_predict(features)
    else:
        fdf["cluster"] = 0

    fdf["final_rank"] = fdf["rank"] + (fdf["spec_count"] / 10)

    # Top results
    result = fdf.sort_values(["cluster", "final_rank"], ascending=[True, False]).head(5)
    print("\n🔎 Top Recommended Lawyers:")
    print(tabulate(result[["Name", "experience", "specialization", "location", "final_rank"]], headers="keys", tablefmt="grid", showindex=False))
    return result

In [45]:
recommend_hybrid(domain="tax law", min_exp=10)
recommend_hybrid(domain="criminal law", max_exp=5)
recommend_hybrid(domain="civil law", min_exp=5, max_exp=20)


🔎 Top Recommended Lawyers:
+----------------+--------------+------------------+-------------------+---------+
| Name           |   experience | specialization   | location          |   score |
| Angela Potter  |           32 | Tax Law          | Bombay High Court |      33 |
+----------------+--------------+------------------+-------------------+---------+
| Alan Hall      |           32 | Tax Law          | Bombay High Court |      33 |
+----------------+--------------+------------------+-------------------+---------+
| Ashley Nichols |           32 | Tax Law          | Bombay High Court |      33 |
+----------------+--------------+------------------+-------------------+---------+
| Matthew Fowler |           31 | Tax Law          | Bombay High Court |      32 |
+----------------+--------------+------------------+-------------------+---------+
| Lynn Atkins    |           31 | Tax Law          | Bombay High Court |      32 |
+----------------+--------------+------------------+-------

Unnamed: 0,Name,specialization,location,experience,Price,No of cases fought,No of cases running,No of cases settled,No of favoured settlements,No of unfavoured settlements,Client satisfaction (out of 10),Age,specialization_list,spec_count,score,cluster
215,Curtis May,Civil Law,Bombay High Court,14,10499.05,81,24,57,24,33,5.8,63,[civil law],1,15,0
870,Autumn Scott,Civil Law,Bombay High Court,14,7017.6,73,19,54,39,15,4.2,39,[civil law],1,15,0
25,Nicole Jimenez,Civil Law,Bombay High Court,12,18800.12,374,92,282,23,259,7.8,46,[civil law],1,13,0
542,Karen Murphy,Civil Law,Bombay High Court,12,28513.86,393,78,315,134,181,9.7,58,[civil law],1,13,0
684,Brianna Sloan,Civil Law,Bombay High Court,12,16432.33,393,77,316,166,150,4.3,44,[civil law],1,13,0
