In [4]:
import pandas as pd
customers_data = pd.read_csv("Customers.csv")
transactions_data = pd.read_csv("Transactions.csv")
products_data = pd.read_csv('Products.csv')

In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
customers_data['Region'] = le.fit_transform(customers_data['Region'])
customers_data['ID'] = le.fit_transform(customers_data['CustomerID'])

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
import json


def create_customer_features(customers_data, transactions_data, products_data):
    # Initialize LabelEncoders
    customer_encoder = LabelEncoder()
    product_encoder = LabelEncoder()
    
    # Fit and transform CustomerID for customers_data
    customers_data['ID'] = customer_encoder.fit_transform(customers_data['CustomerID'])
    
    # Map CustomerID in transactions_data to encoded values, with -1 for unseen labels
    customer_mapping = dict(zip(customers_data['CustomerID'], customers_data['ID']))
    transactions_data['CustomerID'] = transactions_data['CustomerID'].map(customer_mapping).fillna(-1).astype(int)
    
    # Fit and transform ProductID for products_data
    products_data['ProductID'] = product_encoder.fit_transform(products_data['ProductID'])
    
    # Map ProductID in transactions_data to encoded values, with -1 for unseen labels
    product_mapping = dict(zip(products_data['ProductID'], range(len(products_data))))
    transactions_data['ProductID'] = transactions_data['ProductID'].map(product_mapping).fillna(-1).astype(int)
    
    # Create a copy of input dataframes
    customers = customers_data.copy()
    transactions = transactions_data.copy()
    products = products_data.copy()
    
    # Create customer transaction features
    customer_transactions = transactions.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': ['sum', 'mean']
    })
    customer_transactions.columns = ['transaction_count', 'total_spend', 'avg_spend']
    
    # Create category preference features
    category_data = transactions.merge(products, on='ProductID', how='left')
    category_pivot = pd.pivot_table(
        category_data,
        values='TotalValue',
        index='CustomerID',
        columns='Category',
        aggfunc='sum'
    )
    
    # Normalize category preferences
    category_pivot = category_pivot.div(category_pivot.sum(axis=1), axis=0).fillna(0)
    
    # Combine all features
    customer_features = pd.merge(
        customers[['ID', 'Region']], 
        customer_transactions, 
        left_on='ID',
        right_index=True,
        how='left'
    ).fillna(0)
    
    customer_features = pd.merge(
        customer_features,
        category_pivot,
        left_on='ID',
        right_index=True,
        how='left'
    ).fillna(0)
    
    return customer_features



def generate_recommendations(customer_features, top_n=3):
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics.pairwise import cosine_similarity
    import pandas as pd
    import json

    # Scale features
    scaler = StandardScaler()
    feature_matrix = scaler.fit_transform(customer_features.iloc[:, 1:])
    
    # Calculate similarity
    similarity_matrix = cosine_similarity(feature_matrix)
    
    recommendations = {}
    # Get recommendations for first 20 customers
    for idx in range(min(20, len(customer_features))):
        customer_id = int(customer_features['ID'].iloc[idx])  # Convert to native Python int
        sim_scores = list(enumerate(similarity_matrix[idx]))
        sim_scores = [(i, score) for i, score in sim_scores if i != idx]
        sim_scores.sort(key=lambda x: x[1], reverse=True)
        
        top_similar = sim_scores[:top_n]
        rec_list = []
        for similar_idx, score in top_similar:
            similar_customer_id = int(customer_features['ID'].iloc[similar_idx])  # Convert to native Python int
            rec_list.append([similar_customer_id, round(float(score), 4)])  # Ensure score is float
        
        recommendations[customer_id] = rec_list
    
    # Save to CSV
    result_df = pd.DataFrame({
        'CustomerID': list(recommendations.keys()),
        'Recommendations': [json.dumps(v) for v in recommendations.values()]
    })
    result_df.to_csv('Lookalike.csv', index=False)
    
    return recommendations



In [9]:
customer_features = create_customer_features(customers_data.head(20), transactions_data.head(20), products_data.head(20))
recommendations = generate_recommendations(customer_features, top_n=3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  customers_data['ID'] = customer_encoder.fit_transform(customers_data['CustomerID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  transactions_data['CustomerID'] = transactions_data['CustomerID'].map(customer_mapping).fillna(-1).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  products_d

In [8]:
for customer_id, rec_list in recommendations.items():
    customer_name = customers_data[customers_data['ID'] == customer_id]['CustomerName'].values[0]
    print(f"Top 3 recommendations for {customer_name}:")
    for rec in rec_list:
        similar_name = customers_data[customers_data['ID'] == rec[0]]['CustomerName'].values[0]
        print(f"    - {similar_name} (Similarity: {rec[1]})")

Top 3 recommendations for Lawrence Carroll:
    - Michael Rivera (Similarity: 1.0)
    - Kathleen Rodriguez (Similarity: 1.0)
    - Brittany Palmer (Similarity: 1.0)
Top 3 recommendations for Elizabeth Lutz:
    - Laura Weber (Similarity: 1.0)
    - Paul Graves (Similarity: 1.0)
    - Joy Clark (Similarity: 0.967)
Top 3 recommendations for Michael Rivera:
    - Lawrence Carroll (Similarity: 1.0)
    - Kathleen Rodriguez (Similarity: 1.0)
    - Brittany Palmer (Similarity: 1.0)
Top 3 recommendations for Kathleen Rodriguez:
    - Lawrence Carroll (Similarity: 1.0)
    - Michael Rivera (Similarity: 1.0)
    - Brittany Palmer (Similarity: 1.0)
Top 3 recommendations for Laura Weber:
    - Elizabeth Lutz (Similarity: 1.0)
    - Paul Graves (Similarity: 1.0)
    - Joy Clark (Similarity: 0.967)
Top 3 recommendations for Brittany Palmer:
    - Lawrence Carroll (Similarity: 1.0)
    - Michael Rivera (Similarity: 1.0)
    - Kathleen Rodriguez (Similarity: 1.0)
Top 3 recommendations for Paul Grave