In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

**# Load datasets**

In [15]:
customers = pd.read_csv('/Customers.csv')
products = pd.read_csv('/Products.csv')
transactions = pd.read_csv('/Transactions.csv')

**# Merge datasets**

In [16]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

**# Feature engineering**

In [17]:
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': lambda x: x.mode()[0],  # Most purchased product
    'Region': 'first'  # Region of the customer
}).reset_index()

**# Encode categorical variables**

In [18]:
customer_profiles_encoded = pd.get_dummies(customer_profiles, columns=['Region', 'ProductID'])

**# Scale numerical data**

In [19]:
scaler = StandardScaler()
customer_profiles_scaled = scaler.fit_transform(customer_profiles_encoded.iloc[:, 1:])

**# Compute similarity matrix**

In [20]:
similarities = cosine_similarity(customer_profiles_scaled)

**# Generate lookalike recommendations for first 20 customers**

In [21]:
lookalike_results = {}
for i, customer_id in enumerate(customer_profiles['CustomerID'][:20]):
    similar_customers = sorted(list(enumerate(similarities[i])), key=lambda x: -x[1])[1:4]  # Top 3 excluding itself
    lookalike_results[customer_id] = [(customer_profiles['CustomerID'][j], score) for j, score in similar_customers]

**# Save lookalike results to CSV**

In [22]:
import csv
with open('FirstName_LastName_Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['cust_id', 'lookalikes'])
    for cust_id, lookalikes in lookalike_results.items():
        writer.writerow([cust_id, lookalikes])

In [23]:

print("Lookalike model results saved to 'FirstName_LastName_Lookalike.csv'")

Lookalike model results saved to 'FirstName_LastName_Lookalike.csv'
