In [1]:
import os
import pandas as pd 
import os 
from skimage.transform import resize 
from skimage.io import imread 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report

In [2]:
data = "../../Data/AugmentedAlzheimerDataset"
categories = os.listdir(data)

In [3]:
categories

['VeryMildDemented', 'ModerateDemented', 'MildDemented', 'NonDemented']

In [4]:
for category in categories:
    subFolder = os.path.join(data,category)
    print(f"for the category: \t{category} \t the number of images = {len(os.listdir(subFolder))}")

for the category: 	VeryMildDemented 	 the number of images = 8960
for the category: 	ModerateDemented 	 the number of images = 6464
for the category: 	MildDemented 	 the number of images = 8960
for the category: 	NonDemented 	 the number of images = 9600


In [5]:
HEIGHT, WIDTH = 244, 244

flattenedData = []
targeLabel = []

for i in categories:    
    print(f'loading... category : {i}') 
    path=os.path.join(data,i) 
    images = os.listdir(path)
    for j in range(100): 
        img_array=imread(os.path.join(path,images[j])) 
        img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
        flattenedData.append(img_resized.flatten()) 
        targeLabel.append(categories.index(i)) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flattenedData) 
target=np.array(targeLabel)



loading... category : VeryMildDemented
loaded category:VeryMildDemented successfully
loading... category : ModerateDemented
loaded category:ModerateDemented successfully
loading... category : MildDemented
loaded category:MildDemented successfully
loading... category : NonDemented
loaded category:NonDemented successfully


In [6]:
df=pd.DataFrame(flat_data)  
df['Target']=target 
df.shape

(400, 178609)

In [7]:
# Features
X=df.iloc[:,:-1] 
# Target Label 
y=df.iloc[:,-1]

In [8]:
y.value_counts()

Target
0    100
1    100
2    100
3    100
Name: count, dtype: int64

In [9]:
# Splitting the data into training and testing sets 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=3) 


In [10]:
y_test.value_counts()

Target
1    24
3    20
0    20
2    16
Name: count, dtype: int64

Using Grid Search to get the best parameters

In [11]:
# Usinng GridSearchCV, attempting to find best parameters using ovr strategy and 100 images from each category 
paramGrid = {
    'C': [0.1, 1, 10, 100],  # Regularization strength
    'kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'gamma': ['scale', 'auto']  # Kernel coefficient
}

# Creating a support vector classifier 
svmModel=SVC(decision_function_shape="ovr") 

# Creating a model using GridSearchCV with the parameters grid 
grid_search = GridSearchCV(svmModel, paramGrid, scoring='accuracy', verbose = 1, cv=2)
grid_search.fit(X_train, y_train)


Fitting 2 folds for each of 24 candidates, totalling 48 fits


In [12]:
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}


In [13]:
# Training the model using the training data 
predictions = grid_search.predict(X_test)
accuracy = accuracy_score(predictions,y_test)
accuracy

0.4625

In [14]:
print(classification_report(y_test,predictions, target_names=categories))

                  precision    recall  f1-score   support

VeryMildDemented       0.33      0.40      0.36        20
ModerateDemented       0.70      0.67      0.68        24
    MildDemented       0.12      0.06      0.08        16
     NonDemented       0.48      0.60      0.53        20

        accuracy                           0.46        80
       macro avg       0.41      0.43      0.42        80
    weighted avg       0.44      0.46      0.45        80



From the above tests, the best parameters are when we use the kernel: "rbf", C = 100 and "gamma: 'scale'

---

Creating a dataframe to conduct OVR one by one and seeing how that turns out
## SVM model 1: classifying nonDemented vs the rest

In [15]:
HEIGHT, WIDTH = 244, 244

flattenedData = []
targeLabel = []

for i in categories:    
    print(f'loading... category : {i}') 
    path=os.path.join(data,i) 
    images = os.listdir(path)
    if i == "NonDemented": 
        for j in range(1000): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(0) 
    else:
        for j in range(333): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(1) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flattenedData) 
target=np.array(targeLabel)

loading... category : VeryMildDemented
loaded category:VeryMildDemented successfully
loading... category : ModerateDemented
loaded category:ModerateDemented successfully
loading... category : MildDemented
loaded category:MildDemented successfully
loading... category : NonDemented
loaded category:NonDemented successfully


In [16]:
df=pd.DataFrame(flat_data)  
df['Target']=target 
print(f"shape is: {df.shape}")

# Features
X=df.iloc[:,:-1] 
# Target Label 
y=df.iloc[:,-1]
print(y.value_counts())


shape is: (1999, 178609)
Target
0    1000
1     999
Name: count, dtype: int64


In [17]:
# Splitting the data into training and testing sets 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=3) 

In [18]:
# Creating a support vector classifier 
svmModel=SVC(decision_function_shape="ovo", kernel="rbf", C=100, gamma="scale") 

# train 
svmModel.fit(X_train,y_train)


In [19]:
# Training the model using the training data 
predictions = svmModel.predict(X_test)
accuracy = accuracy_score(predictions,y_test)
accuracy

0.775

In [20]:
print(classification_report(y_test,predictions, target_names=["NonDemented", "Rest"]))

              precision    recall  f1-score   support

 NonDemented       0.73      0.84      0.78       194
        Rest       0.83      0.71      0.77       206

    accuracy                           0.78       400
   macro avg       0.78      0.78      0.77       400
weighted avg       0.78      0.78      0.77       400



---

Creating a dataframe to conduct OVR one by one and seeing how that turns out
## SVM model 2: classifying veryMildDemented vs the rest

In [21]:
HEIGHT, WIDTH = 244, 244

flattenedData = []
targeLabel = []

for i in categories:    
    print(f'loading... category : {i}') 
    path=os.path.join(data,i) 
    images = os.listdir(path)
    if i == "VeryMildDemented": 
        for j in range(1000): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(0) 
    else:
        for j in range(333): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(1) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flattenedData) 
target=np.array(targeLabel)

loading... category : VeryMildDemented
loaded category:VeryMildDemented successfully
loading... category : ModerateDemented
loaded category:ModerateDemented successfully
loading... category : MildDemented
loaded category:MildDemented successfully
loading... category : NonDemented
loaded category:NonDemented successfully


In [22]:
df=pd.DataFrame(flat_data)  
df['Target']=target 
print(f"shape is: {df.shape}")

# Features
X=df.iloc[:,:-1] 
# Target Label 
y=df.iloc[:,-1]
print(y.value_counts())


shape is: (1999, 178609)
Target
0    1000
1     999
Name: count, dtype: int64


In [23]:
# Splitting the data into training and testing sets 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=3) 

In [24]:
# Creating a support vector classifier 
svmModel=SVC(decision_function_shape="ovo", kernel="rbf", C=100, gamma="scale") 

# train 
svmModel.fit(X_train,y_train)


In [25]:
# Training the model using the training data 
predictions = svmModel.predict(X_test)
accuracy = accuracy_score(predictions,y_test)
accuracy

0.68

In [26]:
print(classification_report(y_test,predictions, target_names=["VeryMildDemented", "Rest"]))

                  precision    recall  f1-score   support

VeryMildDemented       0.68      0.72      0.70       206
            Rest       0.68      0.63      0.66       194

        accuracy                           0.68       400
       macro avg       0.68      0.68      0.68       400
    weighted avg       0.68      0.68      0.68       400



---

Creating a dataframe to conduct OVR one by one and seeing how that turns out
## SVM model 3: classifying MildDemented vs the rest

In [27]:
HEIGHT, WIDTH = 244, 244

flattenedData = []
targeLabel = []

for i in categories:    
    print(f'loading... category : {i}') 
    path=os.path.join(data,i) 
    images = os.listdir(path)
    if i == "MildDemented": 
        for j in range(1000): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(0) 
    else:
        for j in range(333): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(1) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flattenedData) 
target=np.array(targeLabel)

loading... category : VeryMildDemented
loaded category:VeryMildDemented successfully
loading... category : ModerateDemented
loaded category:ModerateDemented successfully
loading... category : MildDemented
loaded category:MildDemented successfully
loading... category : NonDemented
loaded category:NonDemented successfully


In [28]:
df=pd.DataFrame(flat_data)  
df['Target']=target 
print(f"shape is: {df.shape}")

# Features
X=df.iloc[:,:-1] 
# Target Label 
y=df.iloc[:,-1]
print(y.value_counts())


shape is: (1999, 178609)
Target
0    1000
1     999
Name: count, dtype: int64


In [29]:
# Splitting the data into training and testing sets 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=3) 

In [30]:
# Creating a support vector classifier 
svmModel=SVC(decision_function_shape="ovo", kernel="rbf", C=100, gamma="scale") 

# train 
svmModel.fit(X_train,y_train)


In [31]:
# Training the model using the training data 
predictions = svmModel.predict(X_test)
accuracy = accuracy_score(predictions,y_test)
accuracy

0.79

In [32]:
print(classification_report(y_test,predictions, target_names=["MildDemented", "Rest"]))

              precision    recall  f1-score   support

MildDemented       0.79      0.80      0.79       200
        Rest       0.79      0.79      0.79       200

    accuracy                           0.79       400
   macro avg       0.79      0.79      0.79       400
weighted avg       0.79      0.79      0.79       400



---

Creating a dataframe to conduct OVR one by one and seeing how that turns out
## SVM model 4: classifying ModerateDemented vs the rest

In [33]:
HEIGHT, WIDTH = 244, 244

flattenedData = []
targeLabel = []

for i in categories:    
    print(f'loading... category : {i}') 
    path=os.path.join(data,i) 
    images = os.listdir(path)
    if i == "ModerateDemented": 
        for j in range(1000): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(0) 
    else:
        for j in range(333): 
            img_array=imread(os.path.join(path,images[j])) 
            img_resized=resize(img_array,(HEIGHT,WIDTH,3)) 
            flattenedData.append(img_resized.flatten()) 
            targeLabel.append(1) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flattenedData) 
target=np.array(targeLabel)

loading... category : VeryMildDemented
loaded category:VeryMildDemented successfully
loading... category : ModerateDemented
loaded category:ModerateDemented successfully
loading... category : MildDemented
loaded category:MildDemented successfully
loading... category : NonDemented
loaded category:NonDemented successfully


In [34]:
df=pd.DataFrame(flat_data)  
df['Target']=target 
print(f"shape is: {df.shape}")

# Features
X=df.iloc[:,:-1] 
# Target Label 
y=df.iloc[:,-1]
print(y.value_counts())


shape is: (1999, 178609)
Target
0    1000
1     999
Name: count, dtype: int64


In [35]:
# Splitting the data into training and testing sets 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=3) 

In [36]:
# Creating a support vector classifier 
svmModel=SVC(decision_function_shape="ovo", kernel="rbf", C=100, gamma="scale") 

# train 
svmModel.fit(X_train,y_train)


In [37]:
# Training the model using the training data 
predictions = svmModel.predict(X_test)
accuracy = accuracy_score(predictions,y_test)
accuracy

0.9125

In [38]:
print(classification_report(y_test,predictions, target_names=["ModerateDemented", "Rest"]))

                  precision    recall  f1-score   support

ModerateDemented       0.88      0.94      0.91       192
            Rest       0.94      0.88      0.91       208

        accuracy                           0.91       400
       macro avg       0.91      0.91      0.91       400
    weighted avg       0.91      0.91      0.91       400

