In [None]:
###Generating Look A Like model

In [4]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv("/content/drive/MyDrive/Data Science assignment /Datasets/Customers.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Data Science assignment /Datasets/Transactions.csv")

# Step 1: Prepare the data
# Aggregate transactions to get customer-level metrics
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',   # Total spending
    'Quantity': 'sum',     # Total quantity purchased
    'TransactionID': 'count'  # Total transactions
}).reset_index().rename(columns={'TransactionID': 'TransactionCount'})

# Merge with customer profiles
customer_features = customers.merge(customer_transactions, on='CustomerID', how='left').fillna(0)

# Add derived features
customer_features['SignupYear'] = pd.to_datetime(customer_features['SignupDate']).dt.year
customer_features.drop(columns=['CustomerName', 'SignupDate'], inplace=True)

# Encode categorical features (Region)
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Step 2: Normalize features for similarity calculation
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

# Step 3: Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Step 4: Find top 3 lookalikes for each customer
# Store the results in a dictionary
lookalikes = {}

for idx, customer_id in enumerate(customer_features['CustomerID']):
    # Get similarity scores for the customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    # Exclude the customer itself and sort by similarity score
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = [score for score in similarity_scores if score[0] != idx][:3]
    # Map the customer to their top 3 lookalikes
    lookalikes[customer_id] = [(customer_features.iloc[score[0]]['CustomerID'], score[1]) for score in similarity_scores]

# Step 5: Save the results as a CSV
lookalike_results = []
for customer, similar_customers in lookalikes.items():
    for similar_customer, score in similar_customers:
        lookalike_results.append({'CustomerID': customer, 'SimilarCustomerID': similar_customer, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df.to_csv("/content/drive/MyDrive/Data Science assignment /Python files/Chinmay_Khanapurkar_Lookalike.csv", index=False)

print("Lookalike model completed and results saved to 'Chinmay_Khanapurkar_Lookalike.csv'.")


Lookalike model completed and results saved to 'Chinmay_Khanapurkar_Lookalike.csv'.
