In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [3]:
df = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [8]:
print(df.columns)

Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')


In [9]:
customer_features = df.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'Quantity': 'sum',    # Total quantity purchased
    'Price_x': 'mean',  # Average product price
    'Region': 'first'     # Customer region
}).reset_index()

In [10]:
customer_features = pd.get_dummies(customer_features, columns=['Region'])

In [11]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

In [12]:
similarity_matrix = cosine_similarity(scaled_features)

In [13]:
lookalike_dict = {}
for i in range(20):
    customer_id = customer_features['CustomerID'].iloc[i]
    similarities = list(enumerate(similarity_matrix[i]))
    sorted_similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_dict[customer_id] = [(customer_features['CustomerID'].iloc[j], round(score, 2)) for j, score in sorted_similarities]


In [14]:
lookalike_df = pd.DataFrame(list(lookalike_dict.items()), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model generated successfully.")

Lookalike model generated successfully.
