# TASK - 2 : Lookalike Model

In [15]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [2]:
customer = pd.read_csv("/content/Customers.csv")
product = pd.read_csv("/content/Products.csv")
transaction = pd.read_csv("/content/Transactions.csv")

In [3]:
transaction = transaction.merge(product, on = "ProductID")
data = transaction.merge(customer , on = "CustomerID")

## Feature Engineering

In [5]:
customer['SignupDays'] = (pd.to_datetime('today') - pd.to_datetime(customer['SignupDate'])).dt.days
customer_features = customer[['CustomerID', 'Region', 'SignupDays']]

In [7]:
spend = data.groupby('CustomerID')['TotalValue'].sum().reset_index(name = 'TotalSpend')
purchase_freq = data.groupby('CustomerID')['TransactionID'].count().reset_index(name= 'PurchaseFrequency')
prod_pref = pd.crosstab(data['CustomerID'], data['ProductName'])

In [8]:
customer_data = pd.merge(customer_features, spend, on = 'CustomerID',how='left')
customer_data = pd.merge(customer_data, purchase_freq, on = 'CustomerID', how = 'left')
customer_data = pd.merge(customer_data, prod_pref, on = 'CustomerID' , how='left')

In [9]:
customer_data.head()

Unnamed: 0,CustomerID,Region,SignupDays,TotalSpend,PurchaseFrequency,ActiveWear Biography,ActiveWear Cookbook,ActiveWear Cookware Set,ActiveWear Headphones,ActiveWear Jacket,...,SoundWave Textbook,TechPro Cookbook,TechPro Headphones,TechPro Novel,TechPro Rug,TechPro Running Shoes,TechPro Smartwatch,TechPro T-Shirt,TechPro Textbook,TechPro Vase
0,C0001,South America,932,3354.52,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,C0002,Asia,1079,1862.74,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,C0003,South America,326,2725.38,4.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C0004,South America,841,5354.88,8.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0
4,C0005,Asia,896,2034.24,3.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [10]:
customer_data.isnull().sum()

Unnamed: 0,0
CustomerID,0
Region,0
SignupDays,0
TotalSpend,1
PurchaseFrequency,1
...,...
TechPro Running Shoes,1
TechPro Smartwatch,1
TechPro T-Shirt,1
TechPro Textbook,1


In [11]:
customer_data.fillna(0,inplace=True)

In [13]:
customer_data.isnull().sum()

Unnamed: 0,0
CustomerID,0
Region,0
SignupDays,0
TotalSpend,0
PurchaseFrequency,0
...,...
TechPro Running Shoes,0
TechPro Smartwatch,0
TechPro T-Shirt,0
TechPro Textbook,0


In [14]:
#Normalization

In [16]:
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(customer_data.drop(columns = ['CustomerID','Region']))

## Building the model

In [17]:
similarity_mat = cosine_similarity(normalized_data)

In [20]:
lookalike = {}
for i, customerID in enumerate(customer_data['CustomerID']):
  similar_indices = similarity_mat[i]
  similar_df = pd.DataFrame({
      'CustomerID': customer_data['CustomerID'],
      'Similarity': similar_indices
  })
  similar_df = similar_df[similar_df['CustomerID'] != customerID]
  top3 = similar_df.sort_values(by = 'Similarity', ascending = False).head(3)
  lookalike[customerID] = list(zip(top3['CustomerID'],top3['Similarity']))

In [21]:
final_result = {cust_id : lookalike[cust_id] for cust_id in customer_data['CustomerID'][:20]}

## Saving the model


In [25]:
import csv

In [26]:
with open('Lookalike.csv',mode = 'w', newline='') as file:
  writer = csv.writer(file)
  writer.writerow(['CustomerID','Lookalikes'])
  for cust_id, lookalikes in final_result.items():
    writer.writerow([cust_id,lookalikes])

In [27]:
print("The lookalike model has been successfully built and saveed in 'Lookalike.csv'.")

The lookalike model has been successfully built and saveed in 'Lookalike.csv'.
