In [None]:
from sklearn.utils import resample
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss
from sklearn.metrics import f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from sklearn import datasets

In [None]:
df = pd.read_csv("pd_speech_features.csv")

y = df.loc[:,'class']
X = df.drop(['class', 'id'], axis=1)
X = X.values
y = y.values

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)

sm = SMOTE(random_state=42)
X, y = sm.fit_resample(X, y)

oversampled = np.column_stack((X, y))
num_ones = np.count_nonzero(oversampled[:, -1] == 1)
print("number of PD:", num_ones)
print("number of non-PD:", oversampled.shape[0]-num_ones)

number of PD: 564
number of non-PD: 564


In [None]:
# Load the Iris dataset
# y = df.loc[:,'class']
# X = df.drop(['class', 'id'], axis=1)
# X = X.values
# y = y.values

# X = (X - X.min()) / (X.max() - X.min())

# Convert the data to PyTorch tensors
X = torch.tensor(X, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)

# Define the VAE encoder model
class Encoder(nn.Module):
  def __init__(self, input_dim, hidden_dim, latent_dim):
    super(Encoder, self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.latent_dim = latent_dim
    self.fc1 = nn.Linear(input_dim, hidden_dim)
    self.fc2 = nn.Linear(hidden_dim, latent_dim)
    self.fc3 = nn.Linear(hidden_dim, latent_dim)
  
  def forward(self, x):
    x = self.fc1(x)
    x = torch.relu(x)
    mu = self.fc2(x)
    log_var = self.fc3(x)
    return mu, log_var

# Define the VAE decoder model
class Decoder(nn.Module):
  def __init__(self, latent_dim, hidden_dim, output_dim):
    super(Decoder, self).__init__()
    self.latent_dim = latent_dim
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim
    self.fc1 = nn.Linear(latent_dim, hidden_dim)
    self.fc2 = nn.Linear(hidden_dim, output_dim)
  
  def forward(self, z):
    z = self.fc1(z)
    z = torch.relu(z)
    x_hat = self.fc2(z)
    return x_hat

# Define the VAE model
class VAE(nn.Module):
  def __init__(self, encoder, decoder):
    super(VAE, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
  
  def forward(self, x):
    mu, log_var = self.encoder(x)
    std = torch.exp(0.5 * log_var)
    eps = torch.randn_like(std)
    z = mu + eps * std
    x_hat = self.decoder(z)
    return x_hat, mu, log_var

def kl_loss(mu, log_var):
  kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
  return kl_loss

encoder = Encoder(753, 400, 120)
decoder = Decoder(120, 400, 753)

# Define the VAE model
vae = VAE(encoder, decoder)

# Define the optimizer
optimizer = optim.Adam(vae.parameters(), lr=0.001)

# Training loop
for epoch in range(10):
  for i, x in enumerate(X):
    # Pass the data through the VAE model
    x_hat, mu, log_var = vae(x)
    
    # Compute the loss
    reconstruction_loss = nn.MSELoss()(x_hat, x)
    kl_loss_val = kl_loss(mu, log_var)
    loss = reconstruction_loss + kl_loss_val
    
    # Update the model parameters
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
  # Print the loss at the end of each epoch
  print(f'Epoch {epoch}: Loss {loss.item()}')

Epoch 0: Loss 0.015139072202146053
Epoch 1: Loss 0.008036714047193527
Epoch 2: Loss 0.0205801110714674
Epoch 3: Loss 0.008711128495633602
Epoch 4: Loss 0.0054746633395552635
Epoch 5: Loss 0.007398189976811409
Epoch 6: Loss 0.006864612456411123
Epoch 7: Loss 0.0059611680917441845
Epoch 8: Loss 0.005072538275271654
Epoch 9: Loss 0.005561823956668377


In [None]:
y = df.loc[:,'class']
X = df.drop(['class', 'id'], axis=1)
X = X.values
y = y.values

X = (X - X.min()) / (X.max() - X.min())

# Convert the data to PyTorch tensors
X = torch.tensor(X, dtype=torch.float)
# y = torch.tensor(y, dtype=torch.long)

vae = VAE(encoder, decoder)
mu, log_var = vae.encoder(X)

X = mu.detach().numpy() 
log_var_np = log_var.detach().numpy()

print(X.shape)

(756, 120)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print(X_train.shape)
print(X_test.shape)

(567, 120)
(189, 120)


In [None]:
def lr_func(X_train, y_train, X_test, y_test, X, y):
    model = LogisticRegression(random_state=42, max_iter=1000).fit(X_train, y_train)
    # fitting the classifier
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)

    print("--------------------------------------------------------")
    print("Test Accuracy LogisticRegression Model :",  accuracy_score(y_test, y_pred))
    print("--------------------------------------------------------")
    score = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    print("Cross valdation avaerage score", score.mean())
    print("--------------------------------------------------------")
    print(classification_report(y_test, y_pred))
    # confusion_mtx = confusion_matrix(y_test, y_pred)
    # plot_confusion_matrix(confusion_mtx)


lr_func(X_train, y_train, X_test, y_test, X, y)

--------------------------------------------------------
Test Accuracy LogisticRegression Model : 0.7301587301587301
--------------------------------------------------------
Cross valdation avaerage score 0.7460352039037993
--------------------------------------------------------
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        51
           1       0.73      1.00      0.84       138

    accuracy                           0.73       189
   macro avg       0.37      0.50      0.42       189
weighted avg       0.53      0.73      0.62       189



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
def svm_func_poly(X_train, y_train, X_test, y_test, X, y): #X and y are needed to calculate cross validation score

    model = SVC(kernel='poly', decision_function_shape='ovr')
    model.fit(X_train, y_train)
    #‘linear’, ‘poly’, ‘rbf’

    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)

    print("--------------------------------------------------------")
    print("Test Accuracy LogisticRegression Model :",  accuracy_score(y_test, y_pred))
    print("--------------------------------------------------------")
    score = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    print("Cross valdation avaerage score", score.mean())
    print("--------------------------------------------------------")
    print(classification_report(y_test, y_pred))
    # confusion_mtx = confusion_matrix(y_test, y_pred)
    # plot_confusion_matrix(confusion_mtx)

svm_func_poly(X_train, y_train, X_test, y_test, X, y)

--------------------------------------------------------
Test Accuracy LogisticRegression Model : 0.7301587301587301
--------------------------------------------------------
Cross valdation avaerage score 0.7460352039037993
--------------------------------------------------------
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        51
           1       0.73      1.00      0.84       138

    accuracy                           0.73       189
   macro avg       0.37      0.50      0.42       189
weighted avg       0.53      0.73      0.62       189



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
