In [2]:
import pandas as pd

In [11]:
customer = pd.read_csv(r"C:\Users\Aniket\Downloads\Customers.csv")
products = pd.read_csv(r"C:\Users\Aniket\Downloads\Products.csv")
transactions = pd.read_csv(r"C:\Users\Aniket\Downloads\Transactions.csv")

transactions_products = transactions.merge(products, on= "ProductID",how='left')

full_data = transactions_products.merge(customer, on='CustomerID',how='left')

#aggregating the transaction data
customer_agg  = full_data.groupby("CustomerID").agg({
    "TotalValue":"sum",# total Spend
    "TransactionID":'count', # number of transactions
    "Quantity":'sum'# total quantity purchased 
})
most_freq_category = full_data.groupby("CustomerID")['Category'].agg(lambda x: x.mode()[0])

# combining all the features 
final_data = customer_agg.join(most_freq_category.rename("MostFrequentCategory"))
final_data = final_data.join(customer.set_index("CustomerID"))
final_data["SignupTenure"] = pd.to_datetime("today") - pd.to_datetime(final_data["SignupDate"])
final_data.to_csv("fullData.csv",index=True)
                             

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Feature Engineering 
customer_features = full_data.groupby('CustomerID').agg({
    'TotalValue':'sum',
    'Quantity':'sum',
    'ProductID':'nunique' # unique products purchased
}).reset_index()

In [10]:
# Scale features
scaler = StandardScaler()
scaled_feature = scaler.fit_transform(customer_features[['TotalValue','Quantity','ProductID']])

# compute Cosine similarity 
similarities = cosine_similarity(scaled_feature)

lookalike_results = {}

for idx, customer_id in enumerate(customer_features['CustomerID']):
    similarity_scores = list(enumerate(similarities[idx]))
    similarity_scores = sorted(similarity_scores,key = lambda x: x[1],reverse=True)[1:4]
    lookalike_results[customer_id] = [(customer_features.loc[i]['CustomerID'],score)for i, score in similarity_scores]
# converted to dataframe

lookalike_df = pd.DataFrame({
    'CustomerID':customer_features['CustomerID'],
    'Lookalikes':[lookalike_results[cust]for cust in customer_features['CustomerID']]
})
lookalike_df.to_csv('Lookalike.csv',index = False)