In [99]:
import numpy as np
import pandas as pd

# This is for transformations and preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# This is for models
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# This is for metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

# Steps

1. Import Data
2. Feature Generation
3. Build Model
4. Evaluate

In [47]:
# Import and setup data

df = pd.read_csv("../Data/Dataset_spine.csv")
df.rename(columns={'Col1': 'pelvic_incidence', 'Col2':'pelvic_tilt','Col3':'lumbar_lordosis_angle','Col4':'sacral_slope','Col5':'pelvic_radius','Col6':'degree_spondylolisthesis','Col7':'pelvic_slope','Col8':'direct_tilt','Col9':'thoracic_slope','Col10':'cervical_tilt','Col11':'sacrum_angle','Col12':'scoliosis_slope'}, inplace=True)

# Pull base features
X = np.array(df.iloc[:,:12])
y = np.array([1 if i=='Abnormal' else 0 for i in df.iloc[:,12]])

# Transformations
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

# Shuffle data
X_f, y_f = shuffle(X_scaled, y)

In [51]:
# Build Model

X_train, X_test, y_train, y_test = train_test_split(X_f, y_f, test_size=.25)

# Chose our model
model = GaussianNB()
model.fit(X_train, y_train)
# We have now 'trained' our model

y_est = model.predict(X_test)

In [102]:
# Evaluate our model

def evaluate(y_estimate, y_true):
    acc = accuracy_score(y_estimate, y_true)
    prec = precision_score(y_estimate, y_true)
    rec = recall_score(y_estimate, y_true)
    f1 = f1_score(y_estimate, y_true)

    print(f"Accuracy: {round(acc*100,3)}%")
    print(f"Precision: {round(prec*100,3)}%")
    print(f"Recall: {round(rec*100,3)}%")
    print(f"F1: {round(f1*100,3)}%")
    
    print("Confusion Matrix")
    print(confusion_matrix(y_estimate, y_true))

In [103]:
# Lets run a few examples

# Build Model with no scaling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_est = model.predict(X_test)
print("Basic X - no scaling")
evaluate(y_est, y_test)
print("\n")

# Build Model with scaling
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=.25, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_est = model.predict(X_test)
print("Basic X - with scaling")
evaluate(y_est, y_test)
print("\n")

# Build Model with less features
X_train, X_test, y_train, y_test = train_test_split(X[:,:10], y, test_size=.25, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_est = model.predict(X_test)
print("Basic X - with selective features")
evaluate(y_est, y_test)
print("\n")

# Build Model with SVM
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)
model = SVC(kernel='rbf')
model.fit(X_train, y_train)
y_est = model.predict(X_test)
print("Basic X - with SVM rbf")
evaluate(y_est, y_test)
print("\n")

# Build Model with SVM
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)
model = SVC(kernel='linear')
model.fit(X_train, y_train)
y_est = model.predict(X_test)
print("Basic X - with SVM linear")
evaluate(y_est, y_test)
print("\n")

Basic X - no scaling
Accuracy: 79.487%
Precision: 75.439%
Recall: 95.556%
F1: 84.314%
Confusion Matrix
[[19 14]
 [ 2 43]]


Basic X - with scaling
Accuracy: 79.487%
Precision: 75.439%
Recall: 95.556%
F1: 84.314%
Confusion Matrix
[[19 14]
 [ 2 43]]


Basic X - with selective features
Accuracy: 79.487%
Precision: 75.439%
Recall: 95.556%
F1: 84.314%
Confusion Matrix
[[19 14]
 [ 2 43]]


Basic X - with SVM rbf
Accuracy: 82.051%
Precision: 85.965%
Recall: 89.091%
F1: 87.5%
Confusion Matrix
[[15  8]
 [ 6 49]]


Basic X - with SVM linear
Accuracy: 85.897%
Precision: 92.982%
Recall: 88.333%
F1: 90.598%
Confusion Matrix
[[14  4]
 [ 7 53]]


