In [1]:
import pandas as pd
import numpy as np
import datetime
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [2]:
customer_data=pd.read_excel("UserDate.xlsx")
customer_data.columns = customer_data.columns.str.strip()
customer_data

Unnamed: 0,userid,Name,LastName,PhoneNumber,totaldeliverdorders,totalcanceld,closedbysystem,closedbysupport,totalordersincash,wallet,online,foodstuff,normal,package,grocery,pharmacy,tips,price,C To U,U To C
0,9e1253b4-8411-446d-b03f-b143840c4885,مصطفى,,966531821397,222,0,30,14,9,29,228,2,0,264,0,0,185,167.00,4.990826,5.0
1,fec169bc-8b89-46bb-888e-073c0d21382e,ostorat,tatower,966503413206,198,0,49,44,231,57,8,13,3,280,0,0,160,2395.00,4.893048,5.0
2,42f0925a-056f-4b7f-bbd2-bd401a931856,mar,,966553975785,185,0,13,9,21,0,186,0,57,150,0,0,47,327.60,4.988764,5.0
3,3e2a1a17-175d-4e05-ba23-9553db3c1eca,rasha,,966591362223,184,0,17,9,8,145,57,0,0,210,0,0,269,125.90,4.950000,5.0
4,36b06224-eb6f-4f83-917f-6c7ff0f8b441,أم,خالد,966502990514,182,0,47,10,5,0,235,13,224,0,1,2,0,10268.50,4.954545,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84150,20586357-896b-49d1-aa95-4df6518bb0cb,عبير,الحربي,966568208913,2,0,0,0,0,0,2,1,1,0,0,0,0,68.00,5.000000,5.0
84151,2058d995-a141-4db1-83e4-9bc04d0d5706,حنان,المنصوري,966542992706,2,0,0,0,2,0,0,1,0,0,1,0,0,219.00,5.000000,5.0
84152,205c9fbc-c761-4678-8c95-25544dc82f92,omar,,966561114380,2,0,1,0,2,0,1,0,3,0,0,0,0,188.00,5.000000,5.0
84153,06fefcb7-3db1-4492-9cf9-3af37b88f0ec,ساره,,966550125352,2,0,5,0,0,0,7,0,3,4,0,0,4,63.00,5.000000,5.0


In [3]:
X = customer_data[['totaldeliverdorders', 'totalcanceld', 'closedbysystem', 'closedbysupport', 'totalordersincash', 'wallet', 'online', 'foodstuff', 'normal', 'package', 'grocery', 'pharmacy', 'tips']]

In [4]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
silhouette_scores = []
for n_clusters in range(2, 11):
    kmeans_model = KMeans(n_clusters=n_clusters,random_state=42)
    kmeans_model.fit(X_scaled)
    labels = kmeans_model.labels_
silhouette_scores.append(silhouette_score(X_scaled, labels))

In [6]:
preprocessor = ColumnTransformer(transformers=[('num', StandardScaler(), ['totaldeliverdorders', 'totalcanceld', 'closedbysystem', 'closedbysupport', 'totalordersincash', 'wallet', 'online', 'foodstuff', 'normal', 'package', 'grocery', 'pharmacy', 'tips', ])])

In [7]:
kmeans_model = KMeans(n_clusters=3, random_state=42)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('kmeans_model', kmeans_model)
])

In [8]:
pipeline.fit(customer_data)
customer_data['customer_segment'] = pipeline.predict(customer_data)

In [9]:
segment_labels = {0: 'High Value',
                  1: 'Mid Value',
                  2: 'Low Value'}
customer_data['customer_segment'] = customer_data['customer_segment'].map(segment_labels)

In [10]:
customer_data

Unnamed: 0,userid,Name,LastName,PhoneNumber,totaldeliverdorders,totalcanceld,closedbysystem,closedbysupport,totalordersincash,wallet,...,foodstuff,normal,package,grocery,pharmacy,tips,price,C To U,U To C,customer_segment
0,9e1253b4-8411-446d-b03f-b143840c4885,مصطفى,,966531821397,222,0,30,14,9,29,...,2,0,264,0,0,185,167.00,4.990826,5.0,High Value
1,fec169bc-8b89-46bb-888e-073c0d21382e,ostorat,tatower,966503413206,198,0,49,44,231,57,...,13,3,280,0,0,160,2395.00,4.893048,5.0,High Value
2,42f0925a-056f-4b7f-bbd2-bd401a931856,mar,,966553975785,185,0,13,9,21,0,...,0,57,150,0,0,47,327.60,4.988764,5.0,High Value
3,3e2a1a17-175d-4e05-ba23-9553db3c1eca,rasha,,966591362223,184,0,17,9,8,145,...,0,0,210,0,0,269,125.90,4.950000,5.0,High Value
4,36b06224-eb6f-4f83-917f-6c7ff0f8b441,أم,خالد,966502990514,182,0,47,10,5,0,...,13,224,0,1,2,0,10268.50,4.954545,5.0,High Value
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84150,20586357-896b-49d1-aa95-4df6518bb0cb,عبير,الحربي,966568208913,2,0,0,0,0,0,...,1,1,0,0,0,0,68.00,5.000000,5.0,Mid Value
84151,2058d995-a141-4db1-83e4-9bc04d0d5706,حنان,المنصوري,966542992706,2,0,0,0,2,0,...,1,0,0,1,0,0,219.00,5.000000,5.0,Mid Value
84152,205c9fbc-c761-4678-8c95-25544dc82f92,omar,,966561114380,2,0,1,0,2,0,...,0,3,0,0,0,0,188.00,5.000000,5.0,Mid Value
84153,06fefcb7-3db1-4492-9cf9-3af37b88f0ec,ساره,,966550125352,2,0,5,0,0,0,...,0,3,4,0,0,4,63.00,5.000000,5.0,Mid Value


In [15]:
customer_data.to_excel('FinalRuselt.xlsx', index=False)