In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load datasets
customers = pd.read_csv("/Users/kartikaursang/Downloads/Customers.csv")
products = pd.read_csv("/Users/kartikaursang/Downloads/Products.csv")
transactions = pd.read_csv("/Users/kartikaursang/Downloads/Transactions.csv")

# Merge the data
merged = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Create a pivot table to get a customer-product matrix
customer_product_matrix = merged.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', aggfunc='sum', fill_value=0)

# Normalize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_product_matrix)

# Create the Lookalike model using Nearest Neighbors
model = NearestNeighbors(n_neighbors=3, metric='cosine')
model.fit(scaled_data)

# Get top 3 similar customers for the first 20 customers
lookalike_results = {}
for idx in range(20):
    distances, indices = model.kneighbors([scaled_data[idx]])
    similar_customers = [(customer_product_matrix.index[i], distances[0][j]) for j, i in enumerate(indices[0])]
    lookalike_results[customer_product_matrix.index[idx]] = similar_customers

# Flatten the lookalike results for creating the DataFrame
flattened_results = []
for customer_id, similar_customers in lookalike_results.items():
    for similar_customer in similar_customers:
        flattened_results.append([customer_id, similar_customer[0], similar_customer[1]])

# Create DataFrame from the flattened results
lookalike_df = pd.DataFrame(flattened_results, columns=["CustomerID", "Lookalike_CustomerID", "SimilarityScore"])

# Save to CSV
lookalike_df.to_csv("Kartik_Aursang_Lookalike.csv", index=False)


In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Load datasets
customers = pd.read_csv("/Users/kartikaursang/Downloads/Customers.csv")
products = pd.read_csv("/Users/kartikaursang/Downloads/Products.csv")
transactions = pd.read_csv("/Users/kartikaursang/Downloads/Transactions.csv")

# Check if data is loaded correctly
print("Customers Data Loaded:")
print(customers.head())

print("\nProducts Data Loaded:")
print(products.head())

print("\nTransactions Data Loaded:")
print(transactions.head())

# Merge the data
merged = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")
print("\nMerged Data Sample:")
print(merged.head())

# Create a pivot table to get a customer-product matrix
customer_product_matrix = merged.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', aggfunc='sum', fill_value=0)
print("\nCustomer-Product Matrix:")
print(customer_product_matrix.head())

# Normalize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_product_matrix)
print("\nScaled Data (First 5 Rows):")
print(scaled_data[:5])

# Create the Lookalike model using Nearest Neighbors
model = NearestNeighbors(n_neighbors=3, metric='cosine')
model.fit(scaled_data)
print("\nModel Trained!")

# Get top 3 similar customers for the first 20 customers
lookalike_results = {}
for idx in range(20):
    distances, indices = model.kneighbors([scaled_data[idx]])
    similar_customers = [(customer_product_matrix.index[i], distances[0][j]) for j, i in enumerate(indices[0])]
    lookalike_results[customer_product_matrix.index[idx]] = similar_customers

# Print some of the lookalike results for debugging
print("\nLookalike Results (Sample):")
for customer_id, similar_customers in list(lookalike_results.items())[:3]:  # Show first 3 results
    print(f"{customer_id}: {similar_customers}")

# Flatten the lookalike results for creating the DataFrame
flattened_results = []
for customer_id, similar_customers in lookalike_results.items():
    for similar_customer in similar_customers:
        flattened_results.append([customer_id, similar_customer[0], similar_customer[1]])

# Create DataFrame from the flattened results
lookalike_df = pd.DataFrame(flattened_results, columns=["CustomerID", "Lookalike_CustomerID", "SimilarityScore"])

# Print the first few rows of the final DataFrame
print("\nFinal Lookalike DataFrame (Sample):")
print(lookalike_df.head())

# Save to CSV
lookalike_df.to_csv("Kartik_Aursang_Lookalike.csv", index=False)
print("\nLookalike Data Saved to CSV!")


Customers Data Loaded:
  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15

Products Data Loaded:
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31

Transactions Data Loaded:
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2     