In [9]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load datasets
#customers_file_path = '/mnt/data/Customers.csv'
#products_file_path = '/mnt/data/Products.csv'

try:
    import sklearn
except ModuleNotFoundError:
    raise ImportError("Required package 'scikit-learn' is not installed. Please install it using 'pip install scikit-learn'.")





customers_data = pd.read_csv('Customers.csv')
products_data = pd.read_csv('Products.csv')

# Sample Transaction History Data (Simulated for Lookalike Model)
# For this example, we'll assume customers_data contains a 'TransactionHistory' column
# representing product categories a customer interacted with.
customers_data['TransactionHistory'] = [
    "Books, Electronics",
    "Clothing, Electronics",
    "Home Decor, Books",
    "Books, Electronics, Clothing",
    "Books, Home Decor",
    "Electronics",
    "Clothing",
    "Books, Clothing",
    "Electronics, Home Decor",
    "Clothing, Electronics",
    "Books",
    "Home Decor",
    "Electronics, Books",
    "Clothing, Books",
    "Home Decor",
    "Electronics, Clothing",
    "Books, Home Decor, Electronics",
    "Books, Clothing",
    "Electronics, Home Decor",
    "Clothing, Books"
] * 10  # Extend for 200 customers

# Vectorize TransactionHistory for similarity computation
vectorizer = TfidfVectorizer()
transaction_matrix = vectorizer.fit_transform(customers_data['TransactionHistory'])

# Compute pairwise similarity scores
similarity_matrix = cosine_similarity(transaction_matrix)

# Create a lookalike recommendation function
def get_top_lookalikes(customer_id, top_n=3):
    customer_idx = customers_data[customers_data['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_matches = similarity_scores[1:top_n + 1]  # Exclude self-match
    
    return [(customers_data.iloc[idx]['CustomerID'], score) for idx, score in top_matches]

# Generate Lookalike Recommendations for the first 20 customers
lookalike_map = {}
for customer_id in customers_data['CustomerID'][:20]:
    lookalike_map[customer_id] = get_top_lookalikes(customer_id)

# Convert the lookalike map to a CSV file
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_map.keys(),
    'Recommendations': [str(recommendations) for recommendations in lookalike_map.values()]
})

lookalike_csv_path = 'Lookalike.csv'

lookalike_df.to_csv(lookalike_csv_path, index=False)

print(f"Lookalike recommendations saved to {os.path.abspath(lookalike_csv_path)}")


Lookalike recommendations saved to C:\Users\WINDOWS\Downloads\Lookalike.csv
