<a href="https://colab.research.google.com/github/Thammisetty-Sirisha/Zeotap-Task2/blob/main/Zeotap_Task2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
customers_df = pd.read_csv('/content/drive/MyDrive/Customers.csv')
products_df = pd.read_csv('/content/drive/MyDrive/Products.csv')
transactions_df = pd.read_csv('/content/drive/MyDrive/Transactions.csv')

In [None]:
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])
merged_data = transactions_df.merge(products_df, on='ProductID', how='left')
merged_data = merged_data.merge(customers_df[['CustomerID', 'Region']], on='CustomerID', how='left')

In [None]:
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'Quantity': 'sum'  # Total quantity purchased
}).rename(columns={'TransactionID': 'NumTransactions', 'TotalValue': 'TotalSpending'})

In [None]:
category_pivot = pd.pivot_table(
    merged_data, values='Quantity', index='CustomerID', columns='Category', aggfunc='sum', fill_value=0
)
customer_features = customer_features.join(category_pivot)

In [None]:
region_encoded = pd.get_dummies(customers_df.set_index('CustomerID')['Region'])
customer_features = customer_features.join(region_encoded)

In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)

In [None]:
similarity_matrix = cosine_similarity(scaled_features)

In [None]:
customer_ids = customer_features.index.tolist()
recommendations = {}

In [None]:
for i, customer_id in enumerate(customer_ids):
    similarity_scores = list(enumerate(similarity_matrix[i]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    recommendations[customer_id] = [(customer_ids[j], round(score, 4)) for j, score in sorted_scores]

In [None]:
recommendations_df = pd.DataFrame({
    'CustomerID': recommendations.keys(),
    'Recommendations': [str(value) for value in recommendations.values()]
})

In [None]:
recommendations_df.to_csv('Sirisha_Thammisetty_Lookalike.csv', index=False)

print("Lookalike Model Completed. Recommendations saved to '/content/Sirisha_Thammisetty_Lookalike.csv'.")

Lookalike Model Completed. Recommendations saved to '/content/Sirisha_Thammisetty_Lookalike.csv'.
