In [None]:
!pip install tensorflow

In [2]:
import pandas as pd

df = pd.read_csv("/content/parkinsons.csv")

In [19]:
df.shape

(195, 24)

#I. Préparez ldataset en sélectionnant les features et target -> Divisez le dataset en training set et de test set. -> Appliquer feature scaling

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

features = df.drop(['name', 'status'], axis=1) # Dropping the 'name' column as it's not relevant for prediction
target = df['status'] #'status' is the target variable you want to predict

# Splitting the dataset (into training and test sets)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2,stratify=target,  random_state=42)

#train_test_split(...): Splits the data into training and testing sets.
# test_size=0.2 means 20% for the test set, 80% for training.
# stratify=target ensures that the split maintains the same proportion of class labels in both sets.
# random_state=42 sets a seed for reproducibility.


# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# StandardScaler(): Creates a scaler object to standardize features.
# scaler.fit_transform(X_train): Fits the scaler to the training data and transforms it.
# scaler.transform(X_test): Transforms the testing data using the scaler fitted on the training data.

# II. model training

##1. Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Initialize the Logistic Regression model
log_reg = LogisticRegression(random_state=42)

# Train the model
log_reg.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = log_reg.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)

accuracy

0.9230769230769231

##2. Support Vector Machines (SVM)

In [13]:
from sklearn.svm import SVC

# Initialize the Support Vector Machine model
svm_model = SVC(random_state=42)

# Train the model
svm_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate the model
accuracy_svm = accuracy_score(y_test, y_pred_svm)

accuracy_svm

0.9230769230769231

##3. Neural Networks model (**deep learning**)

> Add blockquote




In [14]:
from sklearn.neural_network import MLPClassifier

# Initialize the Multi-Layer Perceptron (Neural Network) model
mlp_model = MLPClassifier(random_state=42, max_iter=1000)

# Train the model
mlp_model.fit(X_train_scaled, y_train)

# MLPClassifier(...): Creates a neural network classifier with a random seed for reproducibility and a maximum of 1000 iterations.
# mlp_model.fit(...): Trains the model on the scaled training data.


# Predict on the test set
y_pred_mlp = mlp_model.predict(X_test_scaled)

# Evaluate the model
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
classification_rep_mlp = classification_report(y_test, y_pred_mlp)

accuracy_mlp

0.9487179487179487

*=> Neutral Network give higher accuracy than both the Logistic Regression and SVM models*

##4. Convolutional Neural Network (CNN) model (**Deep learning**)

In [16]:
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D
import numpy as np

# Reshaping the data for CNN
# CNN expects data in the shape (number of samples, number of features, 1)
X_train_cnn = np.expand_dims(X_train_scaled, axis=2)
X_test_cnn = np.expand_dims(X_test_scaled, axis=2)

# Defining the CNN model
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(50, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))

# Compile the model
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = cnn_model.fit(X_train_cnn, y_train, epochs=100, batch_size=32, verbose=0, validation_data=(X_test_cnn, y_test))

# Evaluate the model
cnn_accuracy = cnn_model.evaluate(X_test_cnn, y_test, verbose=0)[1]
cnn_accuracy


0.9230769276618958

##5. Recurrent Neural Network (RNN) model (**`deep learning`**)

---



In [17]:
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
import numpy as np

# Reshaping the data for RNN
# RNN expects data in the shape (number of samples, number of timesteps, number of features per timestep)
# Here, we treat each feature as a timestep
X_train_rnn = np.expand_dims(X_train_scaled, axis=1)
X_test_rnn = np.expand_dims(X_test_scaled, axis=1)

# Defining the RNN model
rnn_model = Sequential()
rnn_model.add(SimpleRNN(50, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
rnn_model.add(Dense(1, activation='sigmoid'))

# Compile the model
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = rnn_model.fit(X_train_rnn, y_train, epochs=100, batch_size=32, verbose=0, validation_data=(X_test_rnn, y_test))

# Evaluate the model
rnn_accuracy = rnn_model.evaluate(X_test_rnn, y_test, verbose=0)[1]
rnn_accuracy


0.9230769276618958

##6. Long Short-Term Memory (LSTM) network (**Deep Learning**)


In [18]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np

# Reshaping the data for LSTM
# LSTM expects data in the shape (number of samples, number of timesteps, number of features per timestep)
# Here, we treat each feature as a timestep
X_train_lstm = np.expand_dims(X_train_scaled, axis=1)
X_test_lstm = np.expand_dims(X_test_scaled, axis=1)

# Defining the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
lstm_model.add(Dense(1, activation='sigmoid'))

# Compile the model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = lstm_model.fit(X_train_lstm, y_train, epochs=100, batch_size=32, verbose=0, validation_data=(X_test_lstm, y_test))

# Evaluate the model
lstm_accuracy = lstm_model.evaluate(X_test_lstm, y_test, verbose=0)[1]
lstm_accuracy

0.9230769276618958

##6. notes

les datasets **X_train_scaled** et **y_train** utilisés pour le training étaient les mêmes dans tous les modèles. Cela garantit une comparaison équitable des modèles, car ils ont tous appris à partir des mêmes données.

> Add blockquote

