In [None]:
%pip install pandas numpy scikit-learn

import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold

train_data = pd.read_csv('datasetTV.csv', header=None)
test_data = pd.read_csv('datasetTest.csv', header=None)

X = train_data.iloc[:, :-1].values
y = train_data.iloc[:, -1].values
X_test = test_data.values

scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

models = {
    'Naive Bayes': GaussianNB(),
    'k-NN': KNeighborsClassifier(n_neighbors=5),
    'Perceptron': Perceptron(max_iter=1000, random_state=42),
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'SVM Linear kernel': SVC(kernel='linear', C=1.0, random_state=42),
    'SVM rbf kernel': SVC(kernel='rbf', C=1.0, random_state=42),
    'SVM poly kernel': SVC(kernel='poly', C=1.0, random_state=42),
    'SVM sigmoid kernel': SVC(kernel='sigmoid', C=1.0, random_state=42),
    'Neural Networks': MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42),
    'Bootstrap & Feature Bagging': RandomForestClassifier(n_estimators=100, max_features='sqrt', random_state=42),
    'AdaBoost': AdaBoostClassifier(n_estimators=100, random_state=42),
    'Decision Tree': DecisionTreeClassifier(max_depth=None, random_state=42),
    'Random Forests': RandomForestClassifier(n_estimators=100, random_state=42, max_depth=int(np.sqrt(X.shape[1]))),
}

results = {}
kf = KFold(n_splits=5, shuffle=True, random_state=42) # Cross-validation

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
    results[name] = np.mean(scores)

for method, score in results.items():
    print(f"{method}: {score}")


Note: you may need to restart the kernel to use updated packages.
Naive Bayes: 0.704564491249004
k-NN: 0.8100184741068123
Perceptron: 0.7020479175416354
Logistic Regression: 0.7701007964014742
SVM Linear kernel: 0.765984419995473
SVM rbf kernel: 0.8459337339545447
SVM poly kernel: 0.8334670481857562
SVM sigmoid kernel: 0.783140480405279
Neural Networks: 0.8251170659140954
Bootstrap & Feature Bagging: 0.8117339362276972
AdaBoost: 0.6552666904789006
Decision Tree: 0.6142058947054413
Random Forests: 0.8097904915917956
Logistic Regression (after PCA): 0.7795947471781849
