In [61]:
# Task - 2  Lookalike Model
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [62]:
# Loading datasets for all task
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

In [63]:
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: x.mode()[0]
}).reset_index()

In [64]:
customer_features = pd.get_dummies(customer_features, columns=['Category'], drop_first=True)

In [65]:
scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop('CustomerID', axis=1))

In [66]:
similarity_matrix = cosine_similarity(customer_features_scaled)

In [69]:
lookalike_results = {}
for i in range(20):  # For first 20 customers
    customer_id = customer_features.iloc[i]['CustomerID']
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 similar customers
    lookalike_results[customer_id] = [(customer_features.iloc[j[0]]['CustomerID'], j[1]) for j in similarities]

In [70]:
lookalike_df = pd.DataFrame([
    {'CustomerID': k,
     'LookalikeCustomerID1': v[0][0], 'Score1': v[0][1],
     'LookalikeCustomerID2': v[1][0], 'Score2': v[1][1],
     'LookalikeCustomerID3': v[2][0], 'Score3': v[2][1]}
    for k, v in lookalike_results.items()
])

In [71]:
os.makedirs('/mnt/data', exist_ok=True)
lookalike_df.to_csv('/mnt/data/Lookalike.csv', index=False)