In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from keras.datasets import cifar10
from sklearn.decomposition import PCA

# CIFAR-10 Datensatz laden
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train, y_test = y_train.ravel(), y_test.ravel()

# Daten reduzieren und normalisieren für schnelleres Training (Optional)
pca = PCA(n_components=50)
X_train = pca.fit_transform(X_train.reshape(-1, 3072))
X_test = pca.transform(X_test.reshape(-1, 3072))

# Aufteilung in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=42)

# Modelle definieren
n_estimators = 200  # Hohe Anzahl für gründliches Training
models = {
    "AdaBoost": AdaBoostClassifier(n_estimators=n_estimators),
    "GBT": GradientBoostingClassifier(n_estimators=n_estimators),
    "XGBoost": xgb.XGBClassifier(n_estimators=n_estimators),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "kNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression()
}

# Training und Bewertung
scores = {}
training_times = {}

for name, model in models.items():
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time

    score = model.score(X_test, y_test)

    scores[name] = score
    training_times[name] = training_time

# Plot-Funktion
def plot_scores_and_times(scores, times):
    plt.figure(figsize=(15, 8))
    models = list(scores.keys())
    score_values = list(scores.values())
    time_labels = [f'{times[model]:.2f}s' for model in models]

    plt.bar(models, score_values, color='blue')
    for i, txt in enumerate(time_labels):
        plt.annotate(txt, (i, score_values[i]), textcoords="offset points", xytext=(0,10), ha='center')
    
    plt.xlabel('Model', fontsize=14)
    plt.ylabel('Score', fontsize=14)
    plt.title('Model Performance on CIFAR-10', fontsize=16)
    plt.xticks(rotation=45, fontsize=12)
    plt.yticks(fontsize=12)
    plt.show()

# Plots erstellen
plot_scores_and_times(scores, training_times)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz