In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('4-2.csv')

# Separate features and labels
X = data.drop('label', axis=1)
y = data['label']

# Handle missing values by imputing them
imputer = SimpleImputer(strategy='mean')  # You can change the strategy as needed
X = imputer.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the models
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Support Vector Machine': SVC(kernel='linear', random_state=42)
}

# Train and evaluate each model
for name, model in models.items():
    print(f"\nModel: {name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    print("\nAccuracy Score:")
    print(accuracy_score(y_test, y_pred))




Model: Random Forest
Confusion Matrix:
[[1189    0]
 [  26  755]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1189
           1       1.00      0.97      0.98       781

    accuracy                           0.99      1970
   macro avg       0.99      0.98      0.99      1970
weighted avg       0.99      0.99      0.99      1970


Accuracy Score:
0.9868020304568528

Model: Gradient Boosting
Confusion Matrix:
[[1189    0]
 [  33  748]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99      1189
           1       1.00      0.96      0.98       781

    accuracy                           0.98      1970
   macro avg       0.99      0.98      0.98      1970
weighted avg       0.98      0.98      0.98      1970


Accuracy Score:
0.983248730964467

Model: K-Nearest Neighbors
Confusion Matrix:
[[1174   15]
 [ 418  363]]

Classification 