In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans

def customer_segmentation(df):
    # Aggregate metrics
    business_metrics = df.groupby('ANONYMIZED BUSINESS').agg({
        'QUANTITY': 'sum',
        'Total_Value': 'sum',
        'DATE': 'count'  # Frequency of transactions
    }).rename(columns={'DATE': 'Frequency'}).reset_index()

    # Normalize metrics
    scaler = MinMaxScaler()
    metrics_scaled = scaler.fit_transform(business_metrics[['QUANTITY', 'Total_Value', 'Frequency']])
    
    # Clustering with KMeans
    kmeans = KMeans(n_clusters=3, random_state=42)
    business_metrics['Segment'] = kmeans.fit_predict(metrics_scaled)

    # Label segments (e.g., High Value, Medium Value, Low Value)
    segment_labels = {0: 'High Value', 1: 'Medium Value', 2: 'Low Value'}
    business_metrics['Segment'] = business_metrics['Segment'].map(segment_labels)

    return business_metrics

# Perform segmentation
df = pd.read_csv('../cleaned_data.csv')

# Ensure Total_Value is correctly calculated if missing
if 'Total_Value' not in df.columns:
    df['Total_Value'] = df['QUANTITY'] * df['UNIT PRICE']

# Drop the previous Segment columns if they exist
if 'Segment_x' in df.columns or 'Segment_y' in df.columns:
    df = df.drop(columns=['Segment_x', 'Segment_y'], errors='ignore')

# Perform segmentation and merge
segmented_customers = customer_segmentation(df)
df_with_segments = df.merge(segmented_customers[['ANONYMIZED BUSINESS', 'Segment']], 
                            on='ANONYMIZED BUSINESS', 
                            how='left')

# Save the updated DataFrame with segmentation to a new CSV file
df_with_segments.to_csv('../cleaned_data.csv', index=False)

# Print a preview
print("\n******Cleaned Data with Segments*******\n")
print(df_with_segments.head(50))



  super()._check_params_vs_input(X, default_n_init=10)



******Cleaned Data with Segments*******

                   DATE ANONYMIZED CATEGORY ANONYMIZED PRODUCT  \
0   2024-08-18 21:32:00        Category-106       Product-21f4   
1   2024-08-18 21:32:00        Category-120       Product-4156   
2   2024-08-18 21:32:00        Category-121       Product-49bd   
3   2024-08-18 21:32:00         Category-76       Product-61dd   
4   2024-08-18 21:32:00        Category-119       Product-66e0   
5   2024-08-18 21:32:00         Category-76       Product-6e9c   
6   2024-08-18 21:32:00        Category-120       Product-7864   
7   2024-08-18 21:32:00        Category-119       Product-7940   
8   2024-08-18 21:32:00         Category-96       Product-87b2   
9   2024-08-18 21:32:00        Category-106       Product-c14c   
10  2024-08-06 19:36:00        Category-100       Product-3cc2   
11  2024-08-06 19:36:00         Category-85       Product-5ab4   
12  2024-06-23 19:37:00        Category-120       Product-14f3   
13  2024-06-23 19:37:00         Ca