# **Task 2: Lookalike Model**



---



In [65]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




---



### Import Required Libraries

In [66]:
# Import all the required libraries

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity




---



### Dataset Loading

In [67]:
# Load the Dataset
customers = pd.read_csv("/content/drive/MyDrive/ZEOTAP/Customers.csv")
products = pd.read_csv("/content/drive/MyDrive/ZEOTAP/Products (2).csv")
transactions = pd.read_csv("/content/drive/MyDrive/ZEOTAP/Transactions.csv")




---



### Data Pre-processing

In [68]:
# Merge transaction data with product and customer data using ProductID and CustomerID columns

data = transactions.merge(products, on="ProductID").merge(customers, on="CustomerID")


In [69]:
# Convert 'TransactionDate' to datetime format
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])

# Aggregate data to create customer features
customer_features = data.groupby("CustomerID").agg({
    "TotalValue": "sum",  # Total spending
    "Category": lambda x: x.value_counts().index[0],  # Most purchased category
    "TransactionDate": lambda x: (pd.Timestamp.now() - x.max()).days  # Frequency of Transactions
}).reset_index()

# Rename 'TransactionDate' column to 'TransactionFreq'
customer_features.rename(columns={"TransactionDate": "TransactionFreq"}, inplace=True)

# Normalize numeric features
scaler = MinMaxScaler()
customer_features[["TotalValue", "TransactionFreq"]] = scaler.fit_transform(customer_features[["TotalValue", "TransactionFreq"]])

# One-hot encode the 'Category' feature
customer_features = pd.get_dummies(customer_features, columns=["Category"], drop_first=True)




---



### Model Design

In [70]:
# Drop 'CustomerID' to focus only on numeric features
feature_matrix = customer_features.drop(columns=["CustomerID"])

# Calculate pairwise cosine similarity
similarity_matrix = cosine_similarity(feature_matrix)

# Convert similarity matrix to a DataFrame
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features["CustomerID"], columns=customer_features["CustomerID"])


In [71]:
# Generate Lookalike Recommendations
lookalikes = {}
top_similarity_scores = []

# Loop through the first 20 customers to find their top 3 similar customers
for customer_id in customer_features["CustomerID"][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Top 3 similar customers
    recommendations = [{"cust_id": idx, "score": round(score, 2)} for idx, score in similar_customers.items()]

    lookalikes[customer_id] = recommendations

    top_similarity_scores.extend([rec["score"] for rec in recommendations])




---



### Create and store the Lookalike Data in a CSV file

In [72]:
lookalike_df = pd.DataFrame({"cust_id": lookalikes.keys(), "lookalikes": lookalikes.values()})
lookalike_df.to_csv("Harsh_Mahor_Lookalike.csv", index=False)




---



### Model Accuracy

In [73]:
# Model Accuracy using Mean Similarity Score
average_similarity_score = (sum(top_similarity_scores) / len(top_similarity_scores))*100

print("---------------------------------------------")
print(f"Model Accuracy (Mean Similarity Score): {average_similarity_score:.2f}")
print("---------------------------------------------")


---------------------------------------------
Model Accuracy (Mean Similarity Score): 99.72
---------------------------------------------


**The Lookalike Model has achieved a Mean Similarity Score of 99.72, meaning it can effectively recommend customers with similar features to the given user information.**



---



### Model Testing

In [74]:
# Function to Get Similar Customers
def get_similar_customers(customer_id):
    if customer_id not in similarity_df.index:
        return f"CustomerID {customer_id} not found!"

    # Top 3 similar customers
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
    result = [{"cust_id": idx, "score": round(score, 2)} for idx, score in similar_customers.items()]
    return result


In [75]:
user_input = input("Enter CustomerID to find similar customers: ").strip()

# Generate Top 3 Customers
recommendations = get_similar_customers(user_input)
print("----------------------------------------------")
print(f"Top 3 similar customers for CustomerID {user_input}:")
print("----------------------------------------------")
for rec in recommendations:
    print(f"CustomerID: {rec['cust_id']}, Similarity Score: {rec['score']}")

Enter CustomerID to find similar customers: C0072
----------------------------------------------
Top 3 similar customers for CustomerID C0072:
----------------------------------------------
CustomerID: C0001, Similarity Score: 1.0
CustomerID: C0044, Similarity Score: 1.0
CustomerID: C0050, Similarity Score: 1.0




---

