In [None]:
###Generating Look A Like model

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv("/content/drive/MyDrive/Data Science assignment /Datasets/Customers.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Data Science assignment /Datasets/Transactions.csv")

# Merge customer and transaction data
customer_transaction_data = transactions.groupby('CustomerID').agg({
    'ProductID': lambda x: ' '.join(map(str, x)),  # Combine ProductIDs for each customer
}).reset_index()

# Combine with customer data
customer_profiles = customers.merge(customer_transaction_data, on='CustomerID', how='left').fillna('')

# TF-IDF Vectorization on ProductID
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(customer_profiles['ProductID'])

# Compute cosine similarity
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Prepare Lookalike CSV
lookalike_data = []

for i, customer_id in enumerate(customer_profiles['CustomerID']):
    # Get similarity scores for all customers
    similarity_scores = list(enumerate(cosine_sim_matrix[i]))
    # Exclude self-comparison and sort by score
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_similar = similarity_scores[1:4]  # Top 3 most similar customers

    # Append to lookalike_data
    lookalike_data.append({
        'CustomerID': customer_id,
        'SimilarCustomers': [customer_profiles['CustomerID'][j[0]] for j in top_similar],
        'SimilarityScores': [j[1] for j in top_similar]
    })

# Convert to DataFrame
lookalike_df = pd.DataFrame(lookalike_data)

# Save to CSV
lookalike_df['SimilarCustomers'] = lookalike_df['SimilarCustomers'].apply(lambda x: ', '.join(x))
lookalike_df['SimilarityScores'] = lookalike_df['SimilarityScores'].apply(lambda x: ', '.join(map(str, x)))

lookalike_df.to_csv('/content/drive/MyDrive/Data Science assignment /Python files/Chinmay_Khanapurkar_Lookalike.csv', index=False)
print("Lookalike CSV has been generated!")


Lookalike CSV has been generated!
