In [13]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [14]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [15]:
def build_lookalike_model():
    # Merge datasets
    merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

    # Create customer-product interaction matrix
    customer_product_matrix = merged_data.pivot_table(
        index='CustomerID', columns='ProductID', values='Quantity', fill_value=0
    )

    # Standardize the data
    scaler = StandardScaler()
    standardized_matrix = scaler.fit_transform(customer_product_matrix)

    # Calculate cosine similarity
    similarity_matrix = cosine_similarity(standardized_matrix)
    similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)

    # Extract top 3 similar customers for the first 20 customers
    lookalikes = {}
    for customer_id in similarity_df.index[:20]:
        similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
        lookalikes[customer_id] = list(zip(similar_customers.index, similar_customers.values))

    # Save lookalike results to CSV
    lookalike_df = pd.DataFrame({
        "CustomerID": lookalikes.keys(),
        "SimilarCustomers": [str(v) for v in lookalikes.values()]
    })
    lookalike_df.to_csv("Pranav_Dhobi_Lookalike.csv", index=False)
    print("Lookalike model CSV has been saved as 'Pranav_Dhobi_Lookalike.csv'.")

In [16]:
if __name__ == "__main__":
    build_lookalike_model()

Lookalike model CSV has been saved as 'Pranav_Dhobi_Lookalike.csv'.
