In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from google.colab import files
import io
import warnings

# Suppress warnings for a cleaner output
warnings.filterwarnings('ignore')

print("--- Welcome to the Customer Segmentation Program ---")

# --- 1. DYNAMIC FILE UPLOAD ---
print("Please upload your customer CSV file (e.g., 'Mall_Customers.csv')")
uploaded = files.upload()

# Check if any file was uploaded
if len(uploaded) == 0:
    print("\nError: No file was uploaded. Please try again.")
else:
    # SUCCESS: Get the filename of the *first* file that was uploaded
    # This makes it work even if Colab renames it to "Mall_Customers (7).csv"
    file_name = list(uploaded.keys())[0]

    print(f"\nSuccessfully loaded '{file_name}'!")

    # --- 2. DATA PREPARATION ---
    # Read the content of the uploaded file into a pandas DataFrame
    data = pd.read_csv(io.BytesIO(uploaded[file_name]))

    # Select the relevant columns for clustering
    # We rename them for easier use
    # NOTE: This part still assumes the *column names* inside the file are the same.
    try:
        data.rename(columns={'Annual Income (k$)': 'Income', 'Spending Score (1-100)': 'Score'}, inplace=True)
        X = data[['Income', 'Score']]

        print("Data prepared. Starting model training...")

    except KeyError as e:
        print(f"Error: Missing expected column in the CSV file: {e}. Please ensure 'Annual Income (k$)' and 'Spending Score (1-100)' columns exist.")
    except Exception as e:
        print(f"An unexpected error occurred during data preparation: {e}")

    # Only proceed with clustering if data preparation was successful (i.e., X is defined)
    if 'X' in locals():
        # --- 3. MODEL TRAINING ---
        print("Training the K-Means clustering model...")

        # Initialize the K-Means model with 5 clusters
        kmeans_model = KMeans(n_clusters=5, init='k-means++', random_state=42)

        # Fit the model and get the cluster predictions for each data point
        # 'y_kmeans' will contain the cluster number (0, 1, 2, 3, or 4) for each customer
        y_kmeans = kmeans_model.fit_predict(X)

        # Add the cluster assignments back to our main DataFrame
        data['Cluster'] = y_kmeans

        print("Model training complete.")

        # --- 4. VISUALIZATION ---
        print("Generating cluster visualization...")

        plt.figure(figsize=(10, 6))

        # Plot each of the 5 clusters
        plt.scatter(X[y_kmeans == 0]['Income'], X[y_kmeans == 0]['Score'], s=100, c='green', label='Cluster 1')
        plt.scatter(X[y_kmeans == 1]['Income'], X[y_kmeans == 1]['Score'], s=100, c='red', label='Cluster 2')
        plt.scatter(X[y_kmeans == 2]['Income'], X[y_kmeans == 2]['Score'], s=100, c='cyan', label='Cluster 3')
        plt.scatter(X[y_kmeans == 3]['Income'], X[y_kmeans == 3]['Score'], s=100, c='blue', label='Cluster 4')
        plt.scatter(X[y_kmeans == 4]['Income'], X[y_kmeans == 4]['Score'], s=100, c='magenta', label='Cluster 5')

        # Plot the centroids (the center of each cluster)
        plt.scatter(kmeans_model.cluster_centers_[:, 0], kmeans_model.cluster_centers_[:, 1], s=300, c='yellow', label='Centroids', edgecolors='black')

        plt.title('Customer Segments')
        plt.xlabel('Annual Income (k$)')
        plt.ylabel('Spending Score (1-100)')
        plt.legend()
        plt.grid(True)
        plt.show()

        # --- 5. INTERACTIVE PROMPT (CORRECTED LABELS) ---
        print("\n--- Interactive Customer Prediction ---")
        print("Type 'exit' at any time to quit.")

        while True:
            try:
                # Get user input
                income_input = input("\nEnter customer's Annual Income (k$): ").strip()
                if income_input.lower() == 'exit':
                    break

                score_input = input("Enter customer's Spending Score (1-100): ").strip()
                if score_input.lower() == 'exit':
                    break

                # Convert input to numbers
                new_income = float(income_input)
                new_score = float(score_input)

                # Use the trained model to predict the cluster
                new_customer_data = [[new_income, new_score]]
                predicted_cluster = kmeans_model.predict(new_customer_data)

                print(f"\n>>> This customer belongs to: Cluster {predicted_cluster[0]}")

                # Optional: Give a name to the cluster for a better explanation
                if predicted_cluster[0] == 0:
                    print(">>> (Profile: Standard - Average income, average spending)")
                elif predicted_cluster[0] == 1:
                    print(">>> (Profile: Target - High income, high spending)")
                elif predicted_cluster[0] == 2:
                    print(">>> (Profile: Careless - Low income, high spending)")
                elif predicted_cluster[0] == 3:
                    print(">>> (Profile: Careful - High income, low spending)")
                elif predicted_cluster[0] == 4:
                    print(">>> (Profile: Sensible - Low income, low spending)")

            except ValueError:
                print("Invalid input. Please enter numbers only (or 'exit').")
            except Exception as e:
                print(f"An error occurred: {e}")

        print("\nThank you for using the segmentation program. Goodbye!")