In [1]:
from faker import Faker
import random
import pandas as pd

fake = Faker()

# Predefined specializations
specializations = [
    "Personal Injury Lawyer",
    "Estate Planning Lawyer",
    "Bankruptcy Lawyer",
    "Intellectual Property Lawyer",
    "Employment Lawyer",
    "Corporate Lawyer",
    "Immigration Lawyer",
    "Criminal Lawyer",
    "Medical Malpractice Lawyer",
    "Tax Lawyer",
    "Family Lawyer",
    "Worker's Compensation Lawyer",
    "Contract Lawyer",
    "Social Security Disability Lawyer",
    "Civil Litigation Lawyer",
    "General Practice Lawyer"
]

def random_specializations(n=5):
    return random.sample(specializations, n)

# Generate Lawyers Data
def generate_lawyers(n=50):
    lawyers_data = []
    for _ in range(n):
        lawyers_data.append({
            "lawyer_id": fake.uuid4(),
            "first_name": fake.first_name(),
            "last_name": fake.last_name(),
            "email": fake.email(),
            "specializations": random_specializations(),
            "rating": round(random.uniform(1, 5), 2),  # Random rating between 1 and 5
            "years_of_experience": random.randint(1, 30)
        })
    return pd.DataFrame(lawyers_data)

# Generate Clients Data
def generate_clients(n=100):
    clients_data = []
    for _ in range(n):
        clients_data.append({
            "client_id": fake.uuid4(),
            "first_name": fake.first_name(),
            "last_name": fake.last_name(),
            "email": fake.email(),
            "preferences": random_specializations(),
        })
    return pd.DataFrame(clients_data)

# Generate data
lawyers_df = generate_lawyers(50)
clients_df = generate_clients(100)

# Display the first few rows of each DataFrame
print("Lawyers Data:\n", lawyers_df.head())
print("\nClients Data:\n", clients_df.head())

lawyer_specialties_mapping = {
    "Personal Injury Lawyer": {"category": "general", "keywords": ["injury", "accident"]},
    "Estate Planning Lawyer": {"category": "business", "keywords": ["estate", "will", "trust"]},
    "Bankruptcy Lawyer": {"category": "business", "keywords": ["bankruptcy", "debt", "insolvency"]},
    "Intellectual Property Lawyer": {"category": "technology", "keywords": ["patent", "copyright", "trademark"]},
    "Employment Lawyer": {"category": "business", "keywords": ["employment", "labor", "workplace"]},
    "Immigration Lawyer": {"category": "general", "keywords": ["immigration", "visa", "citizenship"]},
    "Criminal Lawyer": {"category": "general", "keywords": ["crime", "criminal", "justice"]},
    "Medical Malpractice Lawyer": {"category": "health", "keywords": ["medical malpractice", "healthcare", "patient rights"]},
    "Tax Lawyer": {"category": "business", "keywords": ["tax", "IRS", "revenue"]},
    "Family Lawyer": {"category": "general", "keywords": ["family law", "divorce", "custody"]},
    "Worker's Compensation Lawyer": {"category": "business", "keywords": ["worker's compensation", "labor", "injury"]},
    "Contract Lawyer": {"category": "business", "keywords": ["contract", "agreement", "legal"]},
    "Corporate Lawyer": {"category": "business", "keywords": ["corporate", "business", "company"]},
    "Social Security Disability Lawyer": {"category": "general", "keywords": ["social security", "disability", "benefits"]},
    "Civil Litigation Lawyer": {"category": "general", "keywords": ["litigation", "civil law", "trial"]},
    "General Practice Lawyer": {"category": "general", "keywords": ["legal", "law", "general practice"]}
}


Lawyers Data:
                               lawyer_id first_name last_name  \
0  190ccfbd-4247-4cc7-b8e7-18567b2a0801       Jose   Pacheco   
1  70ed4a29-87fa-45b5-8a29-3008b2eb1044     Robert    Hudson   
2  af42e0b4-4b5e-4c30-b009-75b9feb870b6     Ashley   Schmidt   
3  615ca409-1061-4d62-b0ac-ba4574de8f58    Michael    Martin   
4  e8c262b9-6305-4265-b761-ed9f6cf1f04b    Kenneth  Campbell   

                      email  \
0   huntjeffrey@example.org   
1         amy12@example.com   
2    kevinbeard@example.net   
3      austin80@example.com   
4  kellymeadows@example.net   

                                     specializations  rating  \
0  [Immigration Lawyer, Corporate Lawyer, Social ...    3.79   
1  [Civil Litigation Lawyer, Tax Lawyer, Estate P...    2.90   
2  [Worker's Compensation Lawyer, Social Security...    3.18   
3  [Criminal Lawyer, Contract Lawyer, Tax Lawyer,...    4.70   
4  [Estate Planning Lawyer, Worker's Compensation...    3.96   

   years_of_experience  
0  

In [1]:
import requests
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
# Combine lawyers and clients data for simplicity
combined_df = pd.concat([lawyers_df[['lawyer_id', 'specializations']], clients_df[['client_id', 'preferences']].rename(columns={'client_id': 'lawyer_id', 'preferences': 'specializations'})])

# Convert specializations to string for vectorization
combined_df['specializations_str'] = combined_df['specializations'].apply(lambda x: ' '.join(x))

# Create TF-IDF model
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(combined_df['specializations_str'])

# Create KNN model
knn = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(tfidf_matrix)

# Function to recommend news
def recommend_news(user_id, api_key):
    # Find the index of the user in dataframe
    idx = combined_df.index[combined_df['lawyer_id'] == user_id].tolist()[0]

    # Find the nearest neighbors (similar users)
    distances, indices = knn.kneighbors(tfidf_matrix[idx], n_neighbors=5)

    # Get keywords from similar users' specializations
    keywords = []
    for i in indices[0]:
        keywords.extend(combined_df.iloc[i]['specializations'])

    # Fetch news for these keywords
    all_articles = []
    for keyword in set(keywords):
        url = f"https://newsapi.org/v2/everything?q={keyword}&apiKey={api_key}"
        response = requests.get(url)
        if response.status_code == 200:
            all_articles.extend(response.json()['articles'])

    return all_articles[:15]

# Example usage
api_key = "31a6e9bd038e47d8aa7e555b1d1ce0bd"
user_id = lawyers_df['lawyer_id'].iloc[0]  # Use an actual user ID
recommended_articles = recommend_news(user_id, api_key)

# Print recommended articles
for article in recommended_articles:
    print("________________________________________________________________\n")
    print(article['title'], "-", article['description'])
    print (article)

NameError: name 'lawyers_df' is not defined