In [23]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from google.colab import drive
drive.mount('/content/drive')

customers=pd.read_csv('/content/drive/MyDrive/Data Science Project/Customers.csv')
products=pd.read_csv('/content/drive/MyDrive/Data Science Project/Products.csv')
transactions=pd.read_csv('/content/drive/MyDrive/Data Science Project/Transactions.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: x.mode()[0]
}).reset_index()
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
category_encoded = encoder.fit_transform(customer_features[['Category']])
numeric_features = customer_features[['TotalValue', 'Quantity']].values
features = pd.DataFrame(
    data=np.hstack([numeric_features, category_encoded]),
    columns=['TotalValue', 'Quantity'] + list(encoder.get_feature_names_out(['Category']))
)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

similarity_matrix = cosine_similarity(scaled_features)
recommendations = {}
customer_ids = customer_features['CustomerID']
for idx, customer_id in enumerate(customer_ids):
    if customer_id in [f'C{str(i).zfill(4)}' for i in range(1, 21)]:
        similar_customers = sorted(list(enumerate(similarity_matrix[idx])), key=lambda x: x[1], reverse=True)
        recommendations[customer_id] = [
            (customer_ids.iloc[rec[0]], round(rec[1], 2)) for rec in similar_customers[1:4]
        ]


In [25]:
recommendation_list = []
for customer_id, similar_customers in recommendations.items():
    for recommended_customer, similarity_score in similar_customers:
        recommendation_list.append({
            'CustomerID': customer_id,
            'RecommendedCustomerID': recommended_customer,
            'SimilarityScore': similarity_score
        })
recommendation_df = pd.DataFrame(recommendation_list)
recommendation_df.to_csv('Kanishkar_V_Lookalike.csv', index=False)
print("\nProcessed data saved as 'Kanishkar_V_Lookalike.csv'.")


Processed data saved as 'Kanishkar_V_Lookalike.csv'.
