In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv(r"C:\Users\Harshitha\Pictures\Screenshots\Data science Assignment\Customers.csv")
transactions = pd.read_csv(r"C:\Users\Harshitha\Pictures\Screenshots\Data science Assignment\Transactions.csv")
products = pd.read_csv(r"C:\Users\Harshitha\Pictures\Screenshots\Data science Assignment\Products.csv")

# Merge datasets
transactions_customers = transactions.merge(customers, on='CustomerID', how='left')
full_data = transactions_customers.merge(products, on='ProductID', how='left')

# Aggregate features for similarity calculation
customer_features = full_data.groupby('CustomerID').agg(
    TotalTransactions=('TransactionID', 'count'),
    TotalQuantity=('Quantity', 'sum'),
    TotalSpent=('TotalValue', 'sum'),
    DistinctProducts=('ProductID', 'nunique'),
    AvgSpentPerTransaction=('TotalValue', 'mean')
).reset_index()

# Scale features for similarity calculation
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])  # Exclude CustomerID for scaling

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Find top 3 similar customers for each target customer
lookalike_results = {}

for customer_id in similarity_df.index[:20]:  # Only for the first 20 customers (C0001 to C0020)
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]  # Exclude self
    lookalike_results[customer_id] = list(zip(similar_customers.index, similar_customers.values))

# Save results to Lookalike.csv
lookalike_data = {
    "CustomerID": list(lookalike_results.keys()),
    "Lookalikes": [str(lookalike_results[cust_id]) for cust_id in lookalike_results.keys()]
}
lookalike_df = pd.DataFrame(lookalike_data)
output_path = r'C:\Users\Harshitha\Pictures\Screenshots\Data science Assignment\Harshitha N Lookalike.csv'
lookalike_df.to_csv(output_path, index=False)

print(f"Lookalike results saved to {output_path}")


Lookalike results saved to C:\Users\Harshitha\Pictures\Screenshots\Data science Assignment\Lookalike.csv
