In [12]:
# Import packages
import pandas as pd 
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from parse import preprocess

In [13]:
df = preprocess("rawfile_blood.csv")


####################################################################
Number of Rows of Dataframe:
1123
Number of Columns of Dataframe:
59

####################################################################
Threshold for number of NULLs in a column: 0.1095
Number of Columns before Parsing for Too Many NULLs in a column:
59
Number of Columns after Parsing for Too Many NULLs in a column:
51

Columns Removed:
B1_b5
B4_a1
B4_a3
B4_a4
B4_a6
B4_b1
B4_b3
B5_a1

####################################################################
Number of Rows before Parsing NULLs in data:
1123
Number of Rows after Parsing NULLs in data:
1007

####################################################################
Number of Rows after removing dropping A1_2, B1_b4, B2_c3, B4_b2 for inconsistent data types:
1007


In [14]:
# Initialise counters for each condition
frail = 0
frail_mci = 0
mci = 0
prefrail_mci = 0
prefrail = 0
robust = 0

In [15]:
# Count rows of data for each condition
for i in range(0, len(df)):
	if df.at[i, 'condition'] == 'frail':
		frail += 1
	elif df.at[i, 'condition'] == 'frail_mci':
		frail_mci += 1
	elif df.at[i, 'condition'] == 'mci':
		mci += 1
	elif df.at[i, 'condition'] == 'prefrail_mci':
		prefrail_mci += 1
	elif df.at[i, 'condition'] == 'prefrail':
		prefrail += 1
	elif df.at[i, 'condition'] == 'robust':
		robust += 1
        
# Display number of rows (frequency) for each condition (label)
print("\n####################################################################")
print("Labels with frequencies:")
print("Frail:", frail)
print("Frail + MCI:", frail_mci)
print("MCI:", mci)
print("Prefrail + MCI:", prefrail_mci)
print("Prefrail:", prefrail)
print("Robust:", robust)


####################################################################
Labels with frequencies:
Frail: 7
Frail + MCI: 76
MCI: 133
Prefrail + MCI: 231
Prefrail: 221
Robust: 339


In [16]:
# Specify features and labels
y = df['condition']
x = df.drop(['mtag', 'condition'], axis=1)

# Display features and labels
print(x, y)

# Display shape of features and labels
print("\nShape of Features:")
print(x.shape)
print("\nShape of Labels:")
print(y.shape)

      A1_1  A2_1  A3_1  B1_a  B1_a1  B1_a2  B1_a3  B1_a4  B1_a5  B1_a6  ...  \
0      196    24  46.5   121   3.93   0.37     95     31    324   13.3  ...   
1      200    23  55.6   142   4.82   0.42     87     30    346   12.8  ...   
2      441    20  76.8   105   4.54   0.41     90     30    330   14.0  ...   
3      265    16  47.2   122   4.53   0.39     86     27    313   14.9  ...   
4      425    14  31.3   124   4.44   0.38     85     28    329   12.6  ...   
...    ...   ...   ...   ...    ...    ...    ...    ...    ...    ...  ...   
1002   220    19  67.5   138   4.66   0.42     91     30    325   14.1  ...   
1003   334    18  51.0   139   4.63   0.42     91     30    330   15.6  ...   
1004   418    17  61.0   122   4.18   0.38     90     29    324   13.5  ...   
1005   393    18  43.1   136   4.57   0.43     94     30    316   12.5  ...   
1006   371    24  55.9   127   4.41   0.40     90     29    320   13.8  ...   

      B2_d6  B2_d7  B2_d8  B2_d9   B3  B4_a2  B4_a5

In [17]:
# Data Dictionary:
# frail -> 0
# frail_mci -> 1
# mci -> 2
# prefrail_mci -> 3
# prefrail -> 4
# robust -> 5

# Conduct label mapping for conditions
label_mapping = {
    'frail' : 0,
    'frail_mci' : 1,
    'mci' : 2,
    'prefrail_mci' : 3,
    'prefrail' : 4,
    'robust' : 5
}

y = y.map(label_mapping)
y = np.array(y)

# Display label
print(y)

# Display shape of label
print(y.shape)

[0 0 0 ... 5 5 5]
(1007,)


In [18]:
# Conduct train-test split on dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Display x_train, x_test, y_train, y_test
print("\nX Train:")
print(x_train)
print("\nX Test:")
print(x_test)
print("\nY Train:")
print(y_train)
print("\nY Test:")
print(y_test)

# Display shape of train and test sets
print("\nShape of X Train:")
print(x_train.shape)
print("\nShape of X Test:")
print(x_test.shape)
print("\nShape of Y Train:")
print(y_train.shape)
print("\nShape of Y Test:")
print(y_test.shape)


X Train:
     A1_1  A2_1   A3_1  B1_a  B1_a1  B1_a2  B1_a3  B1_a4  B1_a5  B1_a6  ...  \
709   229    20   66.2   162   5.31   0.48     90     31    339   13.4  ...   
841   280    14   50.5   136   4.88   0.43     89     28    316   13.7  ...   
428   753    15   79.3   130   4.15   0.40     97     31    323   13.4  ...   
279   416    24   51.8   148   4.83   0.45     94     30    317   15.1  ...   
78    386    30   27.0   125   4.36   0.39     89     29    322   15.5  ...   
..    ...   ...    ...   ...    ...    ...    ...    ...    ...    ...  ...   
979   410    15  118.4   140   4.80   0.43     90     29    324   13.2  ...   
848   295    17   62.2   136   4.96   0.45     90     30    330   14.4  ...   
786   555    12   37.7   155   4.99   0.47     94     31    330   13.5  ...   
189   621    18   44.3   132   4.05   0.39     95     31    330   12.5  ...   
615   342     7   58.9   137   4.42   0.42     95     31    326   13.0  ...   

     B2_d6  B2_d7  B2_d8  B2_d9    B3  B4

In [19]:
# Create linear SVM model
model = svm.SVC(kernel='linear')

# Train the model
model.fit(x_train, y_train)

# Make predictions
predictions = model.predict(x_test)

# Measure accuracy
acc = accuracy_score(y_test, predictions)

# Print predictions, actual, and accuracy score
print("Predictions:", predictions)
print("Actual:", y_test)
print("Accuracy:", acc)

Predictions: [4 3 2 5 4 5 3 3 3 3 5 5 3 5 5 4 3 3 5 5 5 4 3 5 4 3 5 5 3 1 3 4 3 0 3 5 5
 5 5 3 3 5 3 5 3 4 5 1 5 5 3 3 5 5 3 5 5 3 5 3 5 5 5 3 5 5 3 5 5 5 0 3 3 5
 5 3 4 5 5 2 1 2 5 5 4 3 3 4 2 3 5 3 5 5 2 4 5 5 3 3 2 5 5 5 3 4 5 3 3 4 5
 5 5 5 3 5 5 3 3 4 4 5 5 5 4 3 5 3 5 5 3 5 4 3 2 5 5 5 3 5 2 5 3 5 5 3 3 3
 3 3 5 2 5 5 5 4 5 5 5 5 5 3 3 4 5 2 4 5 3 5 5 5 5 5 5 5 3 3 5 4 5 2 4 5 5
 2 5 3 4 2 5 5 5 5 3 5 5 5 4 5 5 5]
Actual: [5 2 5 5 4 4 2 4 5 3 2 5 3 5 3 4 3 3 4 5 2 2 1 3 5 4 5 4 1 5 3 5 2 3 3 4 5
 4 2 5 3 5 2 5 3 5 5 3 5 4 5 2 5 3 1 4 0 3 5 5 3 4 3 2 5 3 5 3 4 5 3 4 1 4
 3 3 5 4 5 4 1 3 5 2 4 1 3 5 1 4 5 3 2 3 5 3 5 5 4 5 5 5 3 5 4 5 5 3 1 5 5
 4 4 2 3 3 3 3 3 2 4 3 5 5 3 2 3 3 5 5 3 5 1 4 4 2 5 5 2 5 3 4 4 5 4 4 2 1
 5 5 3 4 4 5 3 3 2 5 5 2 2 3 1 4 2 3 3 2 0 4 5 2 4 4 5 4 1 3 5 5 4 3 2 2 5
 2 5 3 4 3 3 3 5 3 4 3 2 5 4 5 5 4]
Accuracy: 0.36633663366336633


In [20]:
# Create RBF SVM model
model = svm.SVC(kernel='rbf')

# Train the model
model.fit(x_train, y_train)

# Make predictions
predictions = model.predict(x_test)

# Measure accuracy
acc = accuracy_score(y_test, predictions)

# Print predictions, actual, and accuracy score
print("Predictions:", predictions)
print("Actual:", y_test)
print("Accuracy:", acc)

Predictions: [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 4 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5]
Actual: [5 2 5 5 4 4 2 4 5 3 2 5 3 5 3 4 3 3 4 5 2 2 1 3 5 4 5 4 1 5 3 5 2 3 3 4 5
 4 2 5 3 5 2 5 3 5 5 3 5 4 5 2 5 3 1 4 0 3 5 5 3 4 3 2 5 3 5 3 4 5 3 4 1 4
 3 3 5 4 5 4 1 3 5 2 4 1 3 5 1 4 5 3 2 3 5 3 5 5 4 5 5 5 3 5 4 5 5 3 1 5 5
 4 4 2 3 3 3 3 3 2 4 3 5 5 3 2 3 3 5 5 3 5 1 4 4 2 5 5 2 5 3 4 4 5 4 4 2 1
 5 5 3 4 4 5 3 3 2 5 5 2 2 3 1 4 2 3 3 2 0 4 5 2 4 4 5 4 1 3 5 5 4 3 2 2 5
 2 5 3 4 3 3 3 5 3 4 3 2 5 4 5 5 4]
Accuracy: 0.33663366336633666


In [21]:
# Create Sigmoid SVM model
model = svm.SVC(kernel='sigmoid')

# Train the model
model.fit(x_train, y_train)

# Make predictions
predictions = model.predict(x_test)

# Measure accuracy
acc = accuracy_score(y_test, predictions)

# Print predictions, actual, and accuracy score
print("Predictions:", predictions)
print("Actual:", y_test)
print("Accuracy:", acc)

Predictions: [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5]
Actual: [5 2 5 5 4 4 2 4 5 3 2 5 3 5 3 4 3 3 4 5 2 2 1 3 5 4 5 4 1 5 3 5 2 3 3 4 5
 4 2 5 3 5 2 5 3 5 5 3 5 4 5 2 5 3 1 4 0 3 5 5 3 4 3 2 5 3 5 3 4 5 3 4 1 4
 3 3 5 4 5 4 1 3 5 2 4 1 3 5 1 4 5 3 2 3 5 3 5 5 4 5 5 5 3 5 4 5 5 3 1 5 5
 4 4 2 3 3 3 3 3 2 4 3 5 5 3 2 3 3 5 5 3 5 1 4 4 2 5 5 2 5 3 4 4 5 4 4 2 1
 5 5 3 4 4 5 3 3 2 5 5 2 2 3 1 4 2 3 3 2 0 4 5 2 4 4 5 4 1 3 5 5 4 3 2 2 5
 2 5 3 4 3 3 3 5 3 4 3 2 5 4 5 5 4]
Accuracy: 0.31683168316831684


In [22]:
# Create Polynomial SVM model
model = svm.SVC(kernel='poly')

# Train the model
model.fit(x_train, y_train)

# Make predictions
predictions = model.predict(x_test)

# Measure accuracy
acc = accuracy_score(y_test, predictions)

# Print predictions, actual, and accuracy score
print("Predictions:", predictions)
print("Actual:", y_test)
print("Accuracy:", acc)

Predictions: [5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5]
Actual: [5 2 5 5 4 4 2 4 5 3 2 5 3 5 3 4 3 3 4 5 2 2 1 3 5 4 5 4 1 5 3 5 2 3 3 4 5
 4 2 5 3 5 2 5 3 5 5 3 5 4 5 2 5 3 1 4 0 3 5 5 3 4 3 2 5 3 5 3 4 5 3 4 1 4
 3 3 5 4 5 4 1 3 5 2 4 1 3 5 1 4 5 3 2 3 5 3 5 5 4 5 5 5 3 5 4 5 5 3 1 5 5
 4 4 2 3 3 3 3 3 2 4 3 5 5 3 2 3 3 5 5 3 5 1 4 4 2 5 5 2 5 3 4 4 5 4 4 2 1
 5 5 3 4 4 5 3 3 2 5 5 2 2 3 1 4 2 3 3 2 0 4 5 2 4 4 5 4 1 3 5 5 4 3 2 2 5
 2 5 3 4 3 3 3 5 3 4 3 2 5 4 5 5 4]
Accuracy: 0.3217821782178218
