In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset into a pandas DataFrame
dataset_path = r'C:\Users\tahsi\OneDrive\Desktop\python_ws\Feature Selection\Recursive Feature Elimination(RFE)\selected_dataset.csv'
df = pd.read_csv(dataset_path)

# Separate the features and target variable
X = df.drop('classification', axis=1)
y = df['classification']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the individual classifiers
mlp_classifier = MLPClassifier(hidden_layer_sizes=(128, 64))
rf_classifier = RandomForestClassifier()

# Perform cross-validation on MLP classifier
mlp_scores = cross_val_score(mlp_classifier, X_train, y_train, cv=5)
print("MLP Cross-Validation Scores:", mlp_scores)
print("MLP Cross-Validation Accuracy:", mlp_scores.mean())

# Perform cross-validation on Random Forest classifier
rf_scores = cross_val_score(rf_classifier, X_train, y_train, cv=5)
print("Random Forest Cross-Validation Scores:", rf_scores)
print("Random Forest Cross-Validation Accuracy:", rf_scores.mean())

# Fit the classifiers on the entire training set
mlp_classifier.fit(X_train, y_train)
rf_classifier.fit(X_train, y_train)

# Make predictions using the individual classifiers
mlp_pred = mlp_classifier.predict(X_test)
rf_pred = rf_classifier.predict(X_test)

# Combine predictions using majority voting
combined_pred = []
for i in range(len(X_test)):
    votes = [mlp_pred[i], rf_pred[i]]  # List of predictions from individual classifiers
    combined_pred.append(max(set(votes), key=votes.count))  # Select the most frequent prediction

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, combined_pred)
precision = precision_score(y_test, combined_pred, average='weighted')
recall = recall_score(y_test, combined_pred, average='weighted')
f1 = f1_score(y_test, combined_pred, average='weighted')
auc = roc_auc_score(pd.get_dummies(y_test).values, pd.get_dummies(combined_pred).values, multi_class='ovr')
v_score = matthews_corrcoef(y_test, combined_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)
print("V Score:", v_score)

# Calculate and visualize the confusion matrix
confusion_matrix_data = confusion_matrix(y_test, combined_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix_data, annot=True, cmap='Blues', fmt='d')
plt.title('Confusion Matrix')
plt.show()


MLP Cross-Validation Scores: [0.59428571 0.53714286 0.60344828 0.60344828 0.55747126]
MLP Cross-Validation Accuracy: 0.5791592775041051
Random Forest Cross-Validation Scores: [0.84       0.86285714 0.8908046  0.85632184 0.7816092 ]
Random Forest Cross-Validation Accuracy: 0.8463185550082102
Accuracy: 0.7568807339449541
Precision: 0.7742207533192774
Recall: 0.7568807339449541
F1 Score: 0.730395751037953
AUC: 0.8105438433728646
V Score: 0.6478523004019726


  _warn_prf(average, modifier, msg_start, len(result))


NameError: name 'confusion_matrix' is not defined