# Step 1: Data Preparation
Load the datasets and preprocess them.

In [1]:
import pandas as pd

# Load datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Convert dates to datetime
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

# Merge datasets
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')

# Step 2: Feature Engineering
Create features based on customer profiles and transaction history.

In [5]:
# Calculate the average price per product
avg_price_per_product = products_df[['ProductID', 'Price']]

# Merge the average price with the transactions data
merged_df = merged_df.merge(avg_price_per_product, on='ProductID', how='left')

# Example feature: Total transaction value per customer
customer_features = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price': 'mean'
}).reset_index()

# Merge with customer profiles
customer_features = customer_features.merge(customers_df, on='CustomerID')

# Display the first few rows of the customer features
print(customer_features.head())

  CustomerID  TotalValue  Quantity       Price        CustomerName  \
0      C0001     3354.52        12  278.334000    Lawrence Carroll   
1      C0002     1862.74        10  208.920000      Elizabeth Lutz   
2      C0003     2725.38        14  195.707500      Michael Rivera   
3      C0004     5354.88        23  240.636250  Kathleen Rodriguez   
4      C0005     2034.24         7  291.603333         Laura Weber   

          Region SignupDate  
0  South America 2022-07-10  
1           Asia 2022-02-13  
2  South America 2024-03-07  
3  South America 2022-10-09  
4           Asia 2022-08-15  


# Step 3: Similarity Calculation
Use cosine similarity to calculate similarity scores.

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Select relevant features
features = ['TotalValue', 'Quantity', 'Price']
X = customer_features[features]

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(X_scaled)

# Step 4: Recommendation
Recommend the top 3 similar customers for each target customer.

In [9]:
# Create a DataFrame to store recommendations
lookalike_df = pd.DataFrame(columns=['CustomerID', 'Lookalike1', 'Score1', 'Lookalike2', 'Score2', 'Lookalike3', 'Score3'])

# Get top 3 similar customers for each target customer
recommendations = []

for idx, customer_id in enumerate(customer_features['CustomerID']):
    similarity_scores = similarity_matrix[idx]
    similar_customers = customer_features['CustomerID'][similarity_scores.argsort()[::-1][1:4]]
    scores = similarity_scores[similarity_scores.argsort()[::-1][1:4]]
    
    recommendations.append({
        'CustomerID': customer_id,
        'Lookalike1': similar_customers.iloc[0],
        'Score1': scores[0],
        'Lookalike2': similar_customers.iloc[1],
        'Score2': scores[1],
        'Lookalike3': similar_customers.iloc[2],
        'Score3': scores[2]
    })

lookalike_df = pd.DataFrame(recommendations)

# Filter for the first 20 customers
lookalike_df = lookalike_df[lookalike_df['CustomerID'].isin(customers_df['CustomerID'][:20])]

# Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike.csv generated successfully.")

Lookalike.csv generated successfully.
