In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.environ['OMP_NUM_THREADS'] = '1'

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

df = pd.read_csv(r'D:\5th Sem Material\customer_segmentation_data.csv')

df.head()
print(df.head())

df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Marital Status'] = df['Marital Status'].map({'Single': 0, 'Married': 1})
df['Employment Status'] = df['Employment Status'].map({'Employed': 0, 'Unemployed': 1, 'Self-employed': 2})
df['Membership Level'] = df['Membership Level'].map({'Basic': 0, 'Silver': 1, 'Premium': 2})
df.drop(['CustomerID'], axis=1, inplace=True)
df.drop( ['Education Level'], axis=1, inplace=True)

X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.plot(range(1, 11), wcss)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

kmeans = KMeans(n_clusters=4, init='k-means++', random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

plt.figure(figsize=(10, 6))
sns.scatterplot(x=df['Annual Income (k$)'], y=df['Spending Score (1-100)'], 
                hue=df['Cluster'], palette='viridis', s=100)
plt.title('Customer Segments')
plt.xlabel('Annual Income (k$)')

plt.ylabel('Spending Score (1-100)')
plt.show()

cluster_summary = df.groupby('Cluster').mean()
print(cluster_summary)

def marketing_strategy(cluster):
    if cluster == 0:
        return "Premium offers and loyalty programs"  
    elif cluster == 1:
        return "Discounts and budget-friendly options" 
    elif cluster == 2:
        return "Family-oriented promotions"  
    elif cluster == 3:
        return "Standard campaigns for moderate spenders" 

df['Marketing Strategy'] = df['Cluster'].apply(marketing_strategy)

print(df[['Annual Income (k$)', 'Spending Score (1-100)', 'Cluster', 'Marketing Strategy']])

from sklearn.metrics import silhouette_score

score = silhouette_score(X_scaled, df['Cluster'])
print(f'Silhouette Score: {score}')

annual_income = float(input("Enter Annual Income (k$): "))
spending_score = int(input("Enter Spending Score (1-100): "))

new_data = np.array([[annual_income, spending_score]])
new_data_scaled = scaler.transform(new_data)
predicted_cluster = kmeans.predict(new_data_scaled)[0]

suggested_strategy = marketing_strategy(predicted_cluster)

print(f"Based on your income and spending, we recommend: {suggested_strategy}")