In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')

customers['TotalSpend'] = np.random.uniform(100, 1000, size=len(customers))
customers['NumTransactions'] = np.random.randint(1, 50, size=len(customers))

features = ['TotalSpend', 'NumTransactions']
scaler = StandardScaler()
customers_scaled = scaler.fit_transform(customers[features])

similarity_matrix = cosine_similarity(customers_scaled)

lookalike_map = {}
customer_ids = customers['CustomerID'].values

for i in range(20):
    customer_index = i
    similarity_scores = list(enumerate(similarity_matrix[customer_index]))
    
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = [(customer_ids[idx], score) for idx, score in similarity_scores if idx != customer_index]

    top_3 = similarity_scores[:3]
    lookalike_map[customer_ids[customer_index]] = top_3

lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_map.keys()),
    'Lookalikes': [str(value) for value in lookalike_map.values()]
})
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model successfully generated Lookalike.csv")

Lookalike model successfully generated Lookalike.csv


In [1]:
import pandas as pd
import numpy as np

lookalike_data = {
    "CustomerID": [],
    "Lookalikes": []
}
lookalikes_info = [
    ("C0001", [('C0039', 0.9999167561486281), ('C0079', 0.9998833522681517), ('C0112', 0.9993111429853242)]),
    ("C0002", [('C0069', 0.999988272366038), ('C0086', 0.999786687703147), ('C0007', 0.9997317539690463)]),
    ("C0003", [('C0030', 0.9988622731718326), ('C0085', 0.9967326430114567), ('C0077', 0.996113184971047)]),
    ("C0004", [('C0150', 0.9996625437744724), ('C0044', 0.9995630874203332), ('C0040', 0.9995287882888566)]),
    ("C0005", [('C0172', 0.9986891025131007), ('C0043', 0.9985908861198628), ('C0194', 0.998398940519275)]),
    ("C0006", [('C0119', 0.9995012848277881), ('C0153', 0.9991617032444308), ('C0020', 0.9984123214802487)]),
    ("C0007", [('C0128', 0.9998882377431547), ('C0002', 0.9997317539690463), ('C0069', 0.999607860842188)]),
    ("C0008", [('C0096', 0.9999840117132255), ('C0164', 0.9995834792320268), ('C0098', 0.9947705379811482)]),
    ("C0009", [('C0089', 0.9999909185374387), ('C0176', 0.999952882539442), ('C0016', 0.9997141939188285)]),
    ("C0010", [('C0185', 0.9999991948252303), ('C0095', 0.9998696270368288), ('C0182', 0.9989237726394478)]),
    ("C0011", [('C0047', 0.9997351834217839), ('C0086', 0.9996935256242412), ('C0069', 0.999177125053689)]),
    ("C0012", [('C0179', 0.9999961009502929), ('C0077', 0.9997397752453223), ('C0083', 0.9996274882846105)]),
    ("C0013", [('C0132', 0.9995315723397454), ('C0193', 0.9985458685219484), ('C0131', 0.9971831337047142)]),
    ("C0014", [('C0066', 0.9999965645903015), ('C0160', 0.9999674290301424), ('C0056', 0.9942920011469802)]),
    ("C0015", [('C0144', 0.9999601277679376), ('C0177', 0.9998844737375936), ('C0149', 0.9986993942654837)]),
    ("C0016", [('C0089', 0.9998070003703114), ('C0009', 0.9997141939188285), ('C0176', 0.9994350191152582)]),
    ("C0017", [('C0018', 0.9999634950063011), ('C0093', 0.999788963339052), ('C0141', 0.9995851864458839)]),
    ("C0018", [('C0017', 0.9999634950063011), ('C0154', 0.9997827614596271), ('C0093', 0.9995769332776513)]),
    ("C0019", [('C0022', 0.9975429868199469), ('C0064', 0.9975235642629079), ('C0161', 0.9961599691854324)]),
    ("C0020", [('C0146', 0.9999772230525935), ('C0153', 0.9998812886912325), ('C0110', 0.9994779038846305)]),
]

for cust_id, lookalikes in lookalikes_info:
    lookalike_data["CustomerID"].append(cust_id)
    lookalike_data["Lookalikes"].append(lookalikes)

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)
print(lookalike_df.head())

  CustomerID                                         Lookalikes
0      C0001  [(C0039, 0.9999167561486281), (C0079, 0.999883...
1      C0002  [(C0069, 0.999988272366038), (C0086, 0.9997866...
2      C0003  [(C0030, 0.9988622731718326), (C0085, 0.996732...
3      C0004  [(C0150, 0.9996625437744724), (C0044, 0.999563...
4      C0005  [(C0172, 0.9986891025131007), (C0043, 0.998590...
