In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from torch.utils.data import DataLoader, TensorDataset


In [2]:
data = pd.read_csv("customers.csv")

data.head()


Unnamed: 0,ID,Gender,Ever_Married,Age,Graduated,Profession,Work_Experience,Family_Size,Spending_Score,Var_1,Segmentation
0,1,Female,Yes,22,No,Lawyer,1,2,High,Cat_1,D
1,2,Female,Yes,24,Yes,Manager,10,2,Low,Cat_1,A
2,3,Female,Yes,26,Yes,Lawyer,8,3,Average,Cat_3,D
3,4,Male,Yes,32,Yes,Manager,13,3,High,Cat_2,B
4,5,Male,Yes,29,Yes,Artist,15,4,High,Cat_4,A


In [3]:
# Drop ID column
data.drop("ID", axis=1, inplace=True)

# Fill missing values
data.fillna(method='ffill', inplace=True)

# Separate features and target
y = data["Segmentation"]
X = data.drop("Segmentation", axis=1)

# Convert categorical â†’ numeric
X = pd.get_dummies(X)

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)


  data.fillna(method='ffill', inplace=True)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Train size:", X_train.shape)
print("Test size :", X_test.shape)


Train size: (960, 22)
Test size : (240, 22)


In [5]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


In [6]:
class PeopleClassifier(nn.Module):
    def __init__(self, input_size):
        super(PeopleClassifier, self).__init__()

        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),

            nn.Linear(64, 32),
            nn.ReLU(),

            nn.Linear(32, 4)
        )

    def forward(self, x):
        return self.net(x)


In [7]:
input_size = X_train.shape[1]

model = PeopleClassifier(input_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [8]:
def train_model(model, train_loader, criterion, optimizer, epochs):

    for epoch in range(epochs):
        total_loss = 0

        for xb, yb in train_loader:

            optimizer.zero_grad()

            outputs = model(xb)
            loss = criterion(outputs, yb)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs} Loss: {total_loss:.4f}")


In [9]:
train_model(model, train_loader, criterion, optimizer, epochs=30)


Epoch 1/30 Loss: 20.8334
Epoch 2/30 Loss: 20.6452
Epoch 3/30 Loss: 20.5023
Epoch 4/30 Loss: 20.3868
Epoch 5/30 Loss: 20.2684
Epoch 6/30 Loss: 20.1562
Epoch 7/30 Loss: 20.0332
Epoch 8/30 Loss: 19.9205
Epoch 9/30 Loss: 19.7861
Epoch 10/30 Loss: 19.6708
Epoch 11/30 Loss: 19.5459
Epoch 12/30 Loss: 19.4116
Epoch 13/30 Loss: 19.2885
Epoch 14/30 Loss: 19.1779
Epoch 15/30 Loss: 19.0505
Epoch 16/30 Loss: 18.9267
Epoch 17/30 Loss: 18.7818
Epoch 18/30 Loss: 18.6608
Epoch 19/30 Loss: 18.5200
Epoch 20/30 Loss: 18.3985
Epoch 21/30 Loss: 18.2520
Epoch 22/30 Loss: 18.1605
Epoch 23/30 Loss: 18.0070
Epoch 24/30 Loss: 17.8789
Epoch 25/30 Loss: 17.7411
Epoch 26/30 Loss: 17.6063
Epoch 27/30 Loss: 17.4634
Epoch 28/30 Loss: 17.3336
Epoch 29/30 Loss: 17.2557
Epoch 30/30 Loss: 17.1606


In [10]:
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)

cm = confusion_matrix(y_test, predicted)
report = classification_report(y_test, predicted)

print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", report)


Confusion Matrix:
 [[17 10 13 20]
 [12 16 17 14]
 [12 15 18 18]
 [14  9 16 19]]

Classification Report:
               precision    recall  f1-score   support

           0       0.31      0.28      0.30        60
           1       0.32      0.27      0.29        59
           2       0.28      0.29      0.28        63
           3       0.27      0.33      0.29        58

    accuracy                           0.29       240
   macro avg       0.29      0.29      0.29       240
weighted avg       0.29      0.29      0.29       240



In [11]:
sample = X_test[0].unsqueeze(0)

with torch.no_grad():
    pred = model(sample)
    _, cls = torch.max(pred, 1)

print("Predicted Segment:", le.inverse_transform(cls.numpy()))


Predicted Segment: ['B']
