# Vertebral column classification 

# Import Libraries 

In [44]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load Data 

In [45]:
df = pd.read_csv("/kaggle/input/vertebralcolumndataset/Dataset_spine.csv")
df.head()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis,Class_att
0,63.027817,22.552586,39.609117,40.475232,98.672917,-0.2544,Abnormal
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,Abnormal
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,Abnormal
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,Abnormal
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,Abnormal


In [46]:
df.shape

(310, 7)

In [47]:
df.isnull().sum()

pelvic incidence               0
 pelvic tilt                   0
 lumbar lordosis angle         0
 sacral slope                  0
 pelvic radius                 0
 grade of spondylolisthesis    0
Class_att                      0
dtype: int64

In [48]:
df.size

2170

In [49]:
df.dtypes

pelvic incidence               float64
 pelvic tilt                   float64
 lumbar lordosis angle         float64
 sacral slope                  float64
 pelvic radius                 float64
 grade of spondylolisthesis    float64
Class_att                       object
dtype: object

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   pelvic incidence             310 non-null    float64
 1    pelvic tilt                 310 non-null    float64
 2    lumbar lordosis angle       310 non-null    float64
 3    sacral slope                310 non-null    float64
 4    pelvic radius               310 non-null    float64
 5    grade of spondylolisthesis  310 non-null    float64
 6   Class_att                    310 non-null    object 
dtypes: float64(6), object(1)
memory usage: 17.1+ KB


In [51]:
df.describe()

Unnamed: 0,pelvic incidence,pelvic tilt,lumbar lordosis angle,sacral slope,pelvic radius,grade of spondylolisthesis
count,310.0,310.0,310.0,310.0,310.0,310.0
mean,60.496653,17.542822,51.93093,42.953831,117.920655,26.296694
std,17.23652,10.00833,18.554064,13.423102,13.317377,37.559027
min,26.147921,-6.554948,14.0,13.366931,70.082575,-11.058179
25%,46.430294,10.667069,37.0,33.347122,110.709196,1.603727
50%,58.691038,16.357689,49.562398,42.404912,118.268178,11.767934
75%,72.877696,22.120395,63.0,52.695888,125.467674,41.287352
max,129.834041,49.431864,125.742385,121.429566,163.071041,418.543082


In [52]:
df.columns

Index(['pelvic incidence', ' pelvic tilt', ' lumbar lordosis angle',
       ' sacral slope', ' pelvic radius', ' grade of spondylolisthesis',
       'Class_att'],
      dtype='object')

In [53]:
df["Class_att"].value_counts()

Class_att
Abnormal    210
Normal      100
Name: count, dtype: int64

# Data Preprocessing and Splitting

In [54]:
X = df.drop("Class_att",axis=1)
y = df["Class_att"]

In [55]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)

In [56]:
scaler = StandardScaler()

In [57]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Implementation 

In [58]:
# Initialize classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train on scaled data
clf.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = clf.predict(X_test_scaled)

# Evaluation

In [59]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=0)

# Print metrics
print("=== Evaluation Metrics ===")
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", report)

=== Evaluation Metrics ===
Accuracy:  0.8065
Precision: 0.8065
Recall:    0.8065
F1-score:  0.8065

Confusion Matrix:
 [[36  6]
 [ 6 14]]

Classification Report:
               precision    recall  f1-score   support

    Abnormal       0.86      0.86      0.86        42
      Normal       0.70      0.70      0.70        20

    accuracy                           0.81        62
   macro avg       0.78      0.78      0.78        62
weighted avg       0.81      0.81      0.81        62

