This Spine model is based on the basic brain model. This works similar to the brain and spine of the human body. In the brain and spine, the spine is necessary for the responding to the reflex actions. In the similar way a separate column of the predicted values, with 80% accuracy and 20% false randomized values, is added to the third hidden layer of the brain. This helps in increasing the accuracy of the model and optimizing the model's output speed.

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [497]:
train_df = pd.read_csv("C:\\Users\\Mokshda Sharma\\Desktop\\My Projects\\Biomimicry\\train.csv")
train_df.head()

Unnamed: 0,ID,Gender,Ever_Married,Age,Graduated,Profession,Work_Experience,Spending_Score,Family_Size,Var_1,Segmentation
0,462809,Male,No,22,No,Healthcare,1.0,Low,4.0,Cat_4,D
1,462643,Female,Yes,38,Yes,Engineer,,Average,3.0,Cat_4,A
2,466315,Female,Yes,67,Yes,Engineer,1.0,Low,1.0,Cat_6,B
3,461735,Male,Yes,67,Yes,Lawyer,0.0,High,2.0,Cat_6,B
4,462669,Female,Yes,40,Yes,Entertainment,,High,6.0,Cat_6,A


In [499]:
# Separating Numerical and categorical cols
numerical_cols = train_df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = train_df.select_dtypes(include=['object', 'category']).columns.tolist()

In [500]:
print(numerical_cols)
print(categorical_cols)

['ID', 'Age', 'Work_Experience', 'Family_Size']
['Gender', 'Ever_Married', 'Graduated', 'Profession', 'Spending_Score', 'Var_1', 'Segmentation']


In [501]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8068 entries, 0 to 8067
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ID               8068 non-null   int64  
 1   Gender           8068 non-null   object 
 2   Ever_Married     7928 non-null   object 
 3   Age              8068 non-null   int64  
 4   Graduated        7990 non-null   object 
 5   Profession       7944 non-null   object 
 6   Work_Experience  7239 non-null   float64
 7   Spending_Score   8068 non-null   object 
 8   Family_Size      7733 non-null   float64
 9   Var_1            7992 non-null   object 
 10  Segmentation     8068 non-null   object 
dtypes: float64(2), int64(2), object(7)
memory usage: 693.5+ KB


In [502]:
train_df['Work_Experience'].fillna(train_df['Work_Experience'].mean(), inplace=True)
train_df['Family_Size'].fillna(train_df['Family_Size'].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Work_Experience'].fillna(train_df['Work_Experience'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Family_Size'].fillna(train_df['Family_Size'].mean(), inplace=True)


In [503]:
# encoding the catagorical cols
label_encoders = {}
label_encoder = LabelEncoder()
for col in categorical_cols:
    train_df[col] = label_encoder.fit_transform(train_df[col])

In [504]:
scaler = StandardScaler()
numeric_cols = ['Age', 'Work_Experience', 'Spending_Score', 'Family_Size']
train_df[numeric_cols] = scaler.fit_transform(train_df[numeric_cols])

In [505]:
X = train_df.drop(['ID', 'Segmentation'], axis=1)  
y = train_df['Segmentation']

In [506]:
# creating a new coln of output vlaues with 80% accurate data 
Z = y.copy()  

# Randomly select 20% of the indices from 'Z' without replacement
n_samples = len(Z)

# For the randomly selected indices, replace their values in 'Z'
# with random integers between 0 and 3 (inclusive)
random_indices = np.random.choice(n_samples, int(0.2 * n_samples), replace=False)
Z.iloc[random_indices] = np.random.randint(0, 4, size=len(random_indices))

# Add the modified 'Z' as a new column in the DataFrame 'X'
X['Z'] = Z 

In [507]:
X.head()

Unnamed: 0,Gender,Ever_Married,Age,Graduated,Profession,Work_Experience,Spending_Score,Family_Size,Var_1,Z
0,1,0,-1.284623,0,5,-0.508763,0.754462,0.767001,3,2
1,0,1,-0.327151,1,2,0.0,-1.603013,0.099972,3,0
2,0,1,1.408268,1,2,-0.508763,0.754462,-1.234085,5,2
3,1,1,1.408268,1,7,-0.818671,-0.424275,-0.567056,5,1
4,0,1,-0.207467,1,3,0.0,-0.424275,2.101059,5,0


In [508]:
# splitting the dataset 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [509]:
# Convert the data into a PyTorch tensor, excluding the 'Z' column.
X_train_tensor = torch.tensor(X_train.drop('Z', axis=1).values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
Z_train_tensor = torch.tensor(X_train['Z'].values, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test.drop('Z', axis=1).values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [511]:

class CustomerSegmentationModel(nn.Module):
    def __init__(self, input_dim):
        super(CustomerSegmentationModel, self).__init__()
        
        # Corrected layer sizes
        self.layer1 = nn.Linear(input_dim, 256)     # Input layer
        self.layer2 = nn.Linear(256, 128)           # First hidden layer
        self.layer3 = nn.Linear(128, 64)            # Second hidden layer
        
        # After concatenating Z (1 feature), input to layer4 becomes 65
        self.layer4 = nn.Linear(64 + 1, 32)         # Third hidden layer
        self.layer5 = nn.Linear(32, 16)             # Fourth hidden layer
        self.layer6 = nn.Linear(16, 8)              # Fifth hidden layer
        self.output_layer = nn.Linear(8, 4)         # Output layer (4 classes)
        
        self.relu = nn.ReLU()                       # ReLU activation function
    
    def forward(self, x, z=None):
        # Pass input through the first layers
        x = self.relu(self.layer1(x))  # First hidden layer
        x = self.relu(self.layer2(x))  # Second hidden layer
        x = self.relu(self.layer3(x))  # Third hidden layer
        
        # Concatenate Z if it is provided (during training)
        if z is not None:
            x = torch.cat((x, z.unsqueeze(1)), dim=1)  # Concatenate along features axis (dim=1)
        
        # Pass through remaining layers
        x = self.relu(self.layer4(x))  # Fourth hidden layer (with Z)
        x = self.relu(self.layer5(x))  # Fifth hidden layer
        x = self.relu(self.layer6(x))  # Sixth hidden layer
        
        # Output layer to get final 4-class predictions
        x = self.output_layer(x)
        return x


In [512]:
# Initialize the model, optimizer, and loss function
input_dim = X_train_tensor.shape[1]  # Number of features in X_train
model = CustomerSegmentationModel(input_dim=input_dim)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(X_train_tensor, Z_train_tensor)  # Pass both X and Z (train data)
    
    # Compute loss
    loss = criterion(outputs, y_train_tensor)
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    
    # Print loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [10/50], Loss: 1.3904
Epoch [20/50], Loss: 1.3573
Epoch [30/50], Loss: 1.2719
Epoch [40/50], Loss: 1.1855
Epoch [50/50], Loss: 1.1272


In [516]:
# Evaluation loop
model.eval()
with torch.no_grad():
    # For evaluation, create dummy Z tensor filled with zeros (if needed)
    Z_test_tensor = torch.zeros(X_test_tensor.size(0), dtype=torch.float32)
    
    # Forward pass for evaluation
    test_outputs = model(X_test_tensor, Z_test_tensor)
    
    # Get predictions
    _, predictions = torch.max(test_outputs, 1)
    
    # Compute accuracy
    accuracy = (predictions == y_test_tensor).float().mean()
    print(f"Test Accuracy: {accuracy:.4f}")


Test Accuracy: 0.4802
