In [23]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, cross_val_score

In [30]:
# Διάβασμα του dataset
data = pd.read_csv('Dataset.csv')

# κενές τιμές με 0
data = data.fillna(0)

# features και target
X = data.drop(columns=['Disease'])  
y = data['Disease']  

# Split σε training και testing σύνολα
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## KNN with Cross Validation

In [16]:
# Εκπαίδευση kNN 
knn = KNeighborsClassifier(n_neighbors=5) 

# Cross-validation me 5 folds
cv_scores = cross_val_score(knn, X_train, y_train, cv=5, scoring='accuracy')  

# Εκτύπωση αποτελεσμάτων του cross-validation
print('Cross-validation accuracy scores:', cv_scores)
print(f'Mean cross-validation accuracy: {cv_scores.mean():.2f}')
print(f'Standard deviation of cross-validation accuracy: {cv_scores.std():.2f}')

# Εκπαίδευση kNN 
knn.fit(X_train, y_train)

# Πρόβλεψη 
y_pred = knn.predict(X_test)

# Υπολογισμός accuracy, precision, recall
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=1)

print('Αποτελέσματα στο Test Set ----')
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')



Cross-validation accuracy scores: [0.96901408 0.97183099 0.97042254 0.97179126 0.97038082]
Mean cross-validation accuracy: 0.97
Standard deviation of cross-validation accuracy: 0.00
Αποτελέσματα στο Test Set ----
Accuracy: 0.97
Precision: 0.98
Recall: 0.97


## Decision Tree

In [18]:
# ______ Εκπαίδευση Decision Tree _____
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

# Πρόβλεψη
y_pred_dt = decision_tree.predict(X_test)

# Cross-validation με 5 folds
cv_scores = cross_val_score(decision_tree, X_train, y_train, cv=5, scoring='accuracy')  

# Εκτύπωση αποτελεσμάτων του cross-validation
print('Cross-validation accuracy scores:', cv_scores)
print(f'Mean cross-validation accuracy: {cv_scores.mean():.2f}')
print(f'Standard deviation of cross-validation accuracy: {cv_scores.std():.2f}')

# Υπολογισμός accuracy, precision, recall
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt, average='weighted', zero_division=1)
recall_dt = recall_score(y_test, y_pred_dt, average='weighted', zero_division=1)

print('Decision Tree:')
print(f'Accuracy: {accuracy_dt:.2f}')
print(f'Precision: {precision_dt:.2f}')
print(f'Recall: {recall_dt:.2f}')




Cross-validation accuracy scores: [0.96901408 0.96619718 0.97042254 0.97179126 0.96755994]
Mean cross-validation accuracy: 0.97
Standard deviation of cross-validation accuracy: 0.00
Decision Tree:
Accuracy: 0.97
Precision: 1.00
Recall: 0.97


## Random Forest 

In [19]:
# _____ Εκπαίδευση Random Forest _____
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)

# Πρόβλεψη
y_pred_rf = random_forest.predict(X_test)

# Cross-validation me 5 folds 
cv_scores = cross_val_score(random_forest, X_train, y_train, cv=5, scoring='accuracy') 

# Εκτύπωση αποτελεσμάτων του cross-validation
print('Cross-validation accuracy scores:', cv_scores)
print(f'Mean cross-validation accuracy: {cv_scores.mean():.2f}')
print(f'Standard deviation of cross-validation accuracy: {cv_scores.std():.2f}')

# Υπολογισμός accuracy, precision, recall
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted', zero_division=1)
recall_rf = recall_score(y_test, y_pred_rf, average='weighted', zero_division=1)

print('Random Forest:')
print(f'Accuracy: {accuracy_rf:.2f}')
print(f'Precision: {precision_rf:.2f}')
print(f'Recall: {recall_rf:.2f}')



Cross-validation accuracy scores: [0.96901408 0.97183099 0.97042254 0.97179126 0.97038082]
Mean cross-validation accuracy: 0.97
Standard deviation of cross-validation accuracy: 0.00
Random Forest:
Accuracy: 0.97
Precision: 1.00
Recall: 0.97


## SVM

In [22]:
# Support Vector Machine 
# Εκπαίδευση SVM
svm = SVC(random_state=42)
svm.fit(X_train, y_train)

# Πρόβλεψη
y_pred_svm = svm.predict(X_test)

# Cross-validation me 4 folds 
cv_scores = cross_val_score(svm, X_train, y_train, cv=4, scoring='accuracy')  

# Εκτύπωση αποτελεσμάτων του cross-validation
print('Cross-validation accuracy scores:', cv_scores)
print(f'Mean cross-validation accuracy: {cv_scores.mean():.2f}')
print(f'Standard deviation of cross-validation accuracy: {cv_scores.std():.2f}')

# Υπολογισμός accuracy, precision, recall
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted', zero_division=1)
recall_svm = recall_score(y_test, y_pred_svm, average='weighted', zero_division=1)

print('Support Vector Machine:')
print(f'Accuracy: {accuracy_svm:.2f}')
print(f'Precision: {precision_svm:.2f}')
print(f'Recall: {recall_svm:.2f}')



Cross-validation accuracy scores: [0.9729425  0.96956032 0.97068771 0.96956032]
Mean cross-validation accuracy: 0.97
Standard deviation of cross-validation accuracy: 0.00
Support Vector Machine:
Accuracy: 0.97
Precision: 0.98
Recall: 0.97


## MLP

In [26]:
# _____ MLP _____
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42)

# Cross-validation
cv_scores_mlp = cross_val_score(mlp, X_train, y_train, cv=4, scoring='accuracy')

# Εκτύπωση αποτελεσμάτων του cross-validation
print('MLP Cross-validation accuracy scores:', cv_scores_mlp)
print(f'Mean cross-validation accuracy: {cv_scores_mlp.mean():.2f}')
print(f'Standard deviation of cross-validation accuracy: {cv_scores_mlp.std():.2f}')

# Εκπαίδευση του MLP 
mlp.fit(X_train, y_train)

# Πρόβλεψη
y_pred_mlp = mlp.predict(X_test)

# Υπολογισμός accuracy, precision, recall
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
precision_mlp = precision_score(y_test, y_pred_mlp, average='weighted', zero_division=1)
recall_mlp = recall_score(y_test, y_pred_mlp, average='weighted', zero_division=1)

print('---- MLP Evaluation on Test Set ----')
print(f'Accuracy: {accuracy_mlp:.2f}')
print(f'Precision: {precision_mlp:.2f}')
print(f'Recall: {recall_mlp:.2f}')



MLP Cross-validation accuracy scores: [0.9729425  0.96956032 0.97068771 0.96956032]
Mean cross-validation accuracy: 0.97
Standard deviation of cross-validation accuracy: 0.00
---- MLP Evaluation on Test Set ----
Accuracy: 0.97
Precision: 0.99
Recall: 0.97
