In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Load dataset
data = pd.read_csv('customer_data.csv')  # Replace with actual dataset link

# Data Exploration
print(data.head())
print(data.info())
print(data.describe())

# Data Cleaning
data.dropna(inplace=True)  # Handling missing values
data.drop_duplicates(inplace=True)  # Removing duplicates

# Feature Engineering
features = data[['purchase_value', 'purchase_frequency']]  # Example features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Customer Segmentation using K-Means
kmeans = KMeans(n_clusters=3)  # Choose number of clusters
data['cluster'] = kmeans.fit_predict(features_scaled)

# Visualization
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='purchase_value', y='purchase_frequency', hue='cluster', palette='viridis')
plt.title('Customer Segmentation')
plt.xlabel('Purchase Value')
plt.ylabel('Purchase Frequency')
plt.legend()
plt.show()

# Insights
for cluster in range(3):
    print(f"Cluster {cluster} characteristics:")
    print(data[data['cluster'] == cluster].describe())
