# 1. Import libraries

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_validate, StratifiedKFold, cross_val_predict, train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, make_scorer, precision_score, recall_score, f1_score, accuracy_score
from joblib import dump
import os
import time
#Models
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Tenserflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import Input

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# 2. Load data

In [2]:
data = pd.read_excel('złączone_dane.xlsx')
data = data.drop('image_id',axis=1)
data = data.drop(columns=[col for col in data.columns if any(x in col for x in ['3_p', '4_p', '5_p'])])

# 3. Preprocessing

In [3]:
X = data.drop('label',axis=1)
y = data['label']
le = LabelEncoder()
y_encoded = le.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

cv = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

# Definition scorrers for multiclasses
scoring = {
    'accuracy': 'accuracy',
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}

# 4. Sklearn models

In [None]:
# Folders
os.makedirs('models', exist_ok=True)
os.makedirs('logs', exist_ok=True)
os.makedirs('reports', exist_ok=True)
# Models
models = {
    'RandomForest': RandomForestClassifier(),
    'LogisticRegression': LogisticRegression(),
    'KNN': KNeighborsClassifier(),
    'SVM': SVC(),
    'NaiveBayes': GaussianNB(),
    'DecisionTree': DecisionTreeClassifier(),
    'MLP': MLPClassifier()
}

for name, model in models.items():
    print(f'\n🔍 Trening modelu: {name}')
    start_time = time.time()

    # Cross-validate + szczegółowe metryki (predykcja foldowa)
    scores = cross_validate(model, X_scaled, y_encoded, cv=cv, scoring=scoring, return_train_score=False)
    y_pred = cross_val_predict(model, X_scaled, y_encoded, cv=cv)

    # Trening końcowy
    model.fit(X_scaled, y_encoded)
    training_time = time.time() - start_time

    # Zapis modelu
    dump(model, f'models/{name}.pkl')

    # Raport klasyfikacji (dla każdej klasy osobno)
    report = classification_report(y_encoded, y_pred, digits=4)

    # Średnie metryki
    avg_acc = np.mean(scores['test_accuracy'])
    avg_prec = np.mean(scores['test_precision'])
    avg_rec = np.mean(scores['test_recall'])
    avg_f1 = np.mean(scores['test_f1'])

    # === Zapis raportu ===
    with open(f'reports/{name}_report.txt', 'w', encoding='utf-8') as f:
        f.write(f"Model: {name}\n\n")
        f.write("=== Klasyfikacja szczegółowa ===\n")
        f.write(report)
        f.write("\n\n=== Średnie metryki z cross-validation ===\n")
        f.write(f"Accuracy: {avg_acc:.4f}\n")
        f.write(f"Precision (macro): {avg_prec:.4f}\n")
        f.write(f"Recall (macro): {avg_rec:.4f}\n")
        f.write(f"F1 Score (macro): {avg_f1:.4f}\n")
        f.write(f"\nCzas treningu: {training_time:.2f} sekund\n")

    # === Zapis logu ===
    with open(f'logs/{name}_log.txt', 'w', encoding='utf-8') as f:
        f.write(f"Model: {name}\n")
        f.write(f"Czas treningu: {training_time:.2f} s\n")
        f.write(f"Parametry: {model.get_params()}\n")
        f.write("\nŚrednie metryki:\n")
        f.write(f"Accuracy: {avg_acc:.4f}\n")
        f.write(f"Precision: {avg_prec:.4f}\n")
        f.write(f"Recall: {avg_rec:.4f}\n")
        f.write(f"F1: {avg_f1:.4f}\n")

    # === Konsola ===
    print('|====================|')
    print(f"Accuracy: {avg_acc:.4f}")
    print(f"Precision: {avg_prec:.4f}")
    print(f"Recall: {avg_rec:.4f}")
    print(f"F1: {avg_f1:.4f}")
    print(f"Czas treningu: {training_time:.2f} s")
    print('|====================|')



🔍 Trening modelu: RandomForest
Accuracy: 0.9901
Precision: 0.9709
Recall: 0.9612
F1: 0.9645
Czas treningu: 209.88 s

🔍 Trening modelu: LogisticRegression


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracy: 0.9745
Precision: 0.9661
Recall: 0.9492
F1: 0.9557
Czas treningu: 24.15 s

🔍 Trening modelu: KNN
Accuracy: 0.9265
Precision: 0.9209
Recall: 0.9008
F1: 0.9074
Czas treningu: 2.68 s

🔍 Trening modelu: SVM
Accuracy: 0.9356
Precision: 0.9345
Recall: 0.9203
F1: 0.9253
Czas treningu: 108.64 s

🔍 Trening modelu: NaiveBayes
Accuracy: 0.7868
Precision: 0.7771
Recall: 0.7880
F1: 0.7747
Czas treningu: 3.75 s

🔍 Trening modelu: DecisionTree
Accuracy: 0.9744
Precision: 0.9228
Recall: 0.9011
F1: 0.9081
Czas treningu: 51.17 s

🔍 Trening modelu: MLP
Accuracy: 0.9888
Precision: 0.9732
Recall: 0.9614
F1: 0.9662
Czas treningu: 121.74 s


# 5. TenserFlow model

In [9]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

acc_tf_list, prec_tf_list, rec_tf_list, f1_tf_list = [], [], [], []

start_time = time.time()

for fold, (train_idx, val_idx) in enumerate(cv.split(X_scaled, y_encoded), 1):
    print(f"\n🔁 Fold {fold}")

    X_train_tf, X_val_tf = X_scaled[train_idx], X_scaled[val_idx]
    y_train_tf, y_val_tf = y_encoded[train_idx], y_encoded[val_idx]

    model_tf = Sequential([
        Input(shape=(X_scaled.shape[1],)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(len(np.unique(y_encoded)), activation='softmax')
    ])

    model_tf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    history = model_tf.fit(
        X_train_tf, y_train_tf,
        validation_data=(X_val_tf, y_val_tf),
        epochs=50,
        callbacks=[early_stop],
        verbose=0
    )

    y_pred_tf = np.argmax(model_tf.predict(X_val_tf), axis=1)

    acc_tf = accuracy_score(y_val_tf, y_pred_tf)
    prec_tf = precision_score(y_val_tf, y_pred_tf, average='macro')
    rec_tf = recall_score(y_val_tf, y_pred_tf, average='macro')
    f1_tf = f1_score(y_val_tf, y_pred_tf, average='macro')

    print(f"Fold {fold} — Acc: {acc_tf:.4f}, Prec: {prec_tf:.4f}, Rec: {rec_tf:.4f}, F1: {f1_tf:.4f}")

    acc_tf_list.append(acc_tf)
    prec_tf_list.append(prec_tf)
    rec_tf_list.append(rec_tf)
    f1_tf_list.append(f1_tf)

# Ewaluacja
y_pred_tf = np.argmax(model_tf.predict(X_val_tf), axis=1)
report_tf = classification_report(y_val_tf, y_pred_tf, digits=4)

training_time_tf = time.time() - start_time
# Metryki
acc_tf = np.mean(acc_tf_list)
prec_tf = np.mean(prec_tf_list)
rec_tf = np.mean(rec_tf_list)
f1_tf = np.mean(f1_tf_list)

# === Zapis raportu ===
with open('reports/NeuralNet_TF_report.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_TF\n\n")
    f.write("=== Klasyfikacja szczegółowa ===\n")
    f.write(report_tf)
    f.write("\n\n=== Średnie metryki z cross-validation ===\n")
    f.write(f"Accuracy: {acc_tf:.4f}\n")
    f.write(f"Precision (macro): {prec_tf:.4f}\n")
    f.write(f"Recall (macro): {rec_tf:.4f}\n")
    f.write(f"F1 Score (macro): {f1_tf:.4f}\n")
    f.write(f"\nCzas treningu: {training_time_tf:.2f} sekund\n")

# === Zapis logu ===
with open('logs/NeuralNet_TF_log.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_TF\n")
    f.write(f"Czas treningu: {training_time_tf:.2f} s\n")
    f.write(f"Epoki: {len(history.history['loss'])}\n")
    f.write(f"Parametry: {model_tf.count_params()} total\n")
    f.write("\nŚrednie metryki:\n")
    f.write(f"Accuracy: {acc_tf:.4f}\n")
    f.write(f"Precision: {prec_tf:.4f}\n")
    f.write(f"Recall: {rec_tf:.4f}\n")
    f.write(f"F1: {f1_tf:.4f}\n")

# === Konsola ===
print('\n🔍 Trening modelu: NeuralNet_TF')
print('|====================|')
print(f"Accuracy: {acc_tf:.4f}")
print(f"Precision: {prec_tf:.4f}")
print(f"Recall: {rec_tf:.4f}")
print(f"F1: {f1_tf:.4f}")
print(f"Czas treningu: {training_time_tf:.2f} s")
print('|====================|')


🔁 Fold 1
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Fold 1 — Acc: 0.9843, Prec: 0.9629, Rec: 0.9621, F1: 0.9615

🔁 Fold 2
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Fold 2 — Acc: 0.9902, Prec: 0.9701, Rec: 0.9693, F1: 0.9690

🔁 Fold 3
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Fold 3 — Acc: 0.9872, Prec: 0.9674, Rec: 0.9719, F1: 0.9693

🔁 Fold 4
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Fold 4 — Acc: 0.9902, Prec: 0.9685, Rec: 0.9657, F1: 0.9658

🔁 Fold 5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Fold 5 — Acc: 0.9808, Prec: 0.9556, Rec: 0.9416, F1: 0.9464
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

🔍 Trening modelu: NeuralNet_TF
Accuracy: 0.9865
Precision: 0.9649
Recall: 0.9621
F1: 0.9624
Czas treningu: 51.10 s


# 6. PyTorch model

In [None]:
class Net(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.net(x)
    
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

acc_list, prec_list, rec_list, f1_list = [], [], [], []
start_time = time.time()

for fold, (train_idx, val_idx) in enumerate(cv.split(X_scaled, y_encoded), 1):
    print(f"\n🔁 Fold {fold}")

    X_train_fold, X_val_fold = X_scaled[train_idx], X_scaled[val_idx]
    y_train_fold, y_val_fold = y_encoded[train_idx], y_encoded[val_idx]

    X_train_tensor = torch.tensor(X_train_fold, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train_fold, dtype=torch.long)
    X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val_fold, dtype=torch.long)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    model = Net(X_scaled.shape[1], len(np.unique(y_encoded)))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    best_model_path = f"models/NeuralNet_PT_best_fold{fold}.pt"
    os.makedirs("models", exist_ok=True)

    for epoch in range(50):
        model.train()
        for xb, yb in train_loader:
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_tensor)
            val_loss = criterion(val_outputs, y_val_tensor).item()

        print(f"Fold {fold}, Epoka {epoch+1}, Strata walidacyjna: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"⏹️ Wczesne zatrzymanie na epoce {epoch+1}")
                break

    # Ewaluacja
    model.load_state_dict(torch.load(best_model_path))
    model.eval()
    with torch.no_grad():
        y_pred = model(X_val_tensor).argmax(dim=1).numpy()
        y_true = y_val_tensor.numpy()

    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='macro')
    rec = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')

    print(f"Fold {fold} — Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}, F1: {f1:.4f}")

    acc_list.append(acc)
    prec_list.append(prec)
    rec_list.append(rec)
    f1_list.append(f1)

# Podsumowanie
training_time = time.time() - start_time
acc_mean = np.mean(acc_list)
prec_mean = np.mean(prec_list)
rec_mean = np.mean(rec_list)
f1_mean = np.mean(f1_list)

# === Zapis raportu ===
os.makedirs("reports", exist_ok=True)
with open('reports/NeuralNet_PT_report.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_PT\n\n")
    f.write("=== Średnie metryki z cross-validation ===\n")
    f.write(f"Accuracy: {acc_mean:.4f}\n")
    f.write(f"Precision (macro): {prec_mean:.4f}\n")
    f.write(f"Recall (macro): {rec_mean:.4f}\n")
    f.write(f"F1 Score (macro): {f1_mean:.4f}\n")
    f.write(f"\nCzas treningu: {training_time:.2f} sekund\n")

# === Log ===
os.makedirs("logs", exist_ok=True)
with open('logs/NeuralNet_PT_log.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_PT\n")
    f.write(f"Czas treningu: {training_time:.2f} s\n")
    f.write(f"Parametry: {sum(p.numel() for p in model.parameters())} total\n")
    f.write("\nŚrednie metryki:\n")
    f.write(f"Accuracy: {acc_mean:.4f}\n")
    f.write(f"Precision: {prec_mean:.4f}\n")
    f.write(f"Recall: {rec_mean:.4f}\n")
    f.write(f"F1: {f1_mean:.4f}\n")

# === Konsola ===
print('\n🔍 Trening modelu: NeuralNet_PT (KFold)')
print('|====================|')
print(f"Accuracy: {acc_mean:.4f}")
print(f"Precision: {prec_mean:.4f}")
print(f"Recall: {rec_mean:.4f}")
print(f"F1: {f1_mean:.4f}")
print(f"Czas treningu: {training_time:.2f} s")
print('|====================|')


🔁 Fold 1
Fold 1, Epoka 1, Strata walidacyjna: 0.5046
Fold 1, Epoka 2, Strata walidacyjna: 0.3178
Fold 1, Epoka 3, Strata walidacyjna: 0.2335
Fold 1, Epoka 4, Strata walidacyjna: 0.1986
Fold 1, Epoka 5, Strata walidacyjna: 0.1479
Fold 1, Epoka 6, Strata walidacyjna: 0.1177
Fold 1, Epoka 7, Strata walidacyjna: 0.1108
Fold 1, Epoka 8, Strata walidacyjna: 0.1080
Fold 1, Epoka 9, Strata walidacyjna: 0.0860
Fold 1, Epoka 10, Strata walidacyjna: 0.1262
Fold 1, Epoka 11, Strata walidacyjna: 0.0615
Fold 1, Epoka 12, Strata walidacyjna: 0.0795
Fold 1, Epoka 13, Strata walidacyjna: 0.1181
Fold 1, Epoka 14, Strata walidacyjna: 0.0928
Fold 1, Epoka 15, Strata walidacyjna: 0.0669
Fold 1, Epoka 16, Strata walidacyjna: 0.0595
Fold 1, Epoka 17, Strata walidacyjna: 0.0588
Fold 1, Epoka 18, Strata walidacyjna: 0.0548
Fold 1, Epoka 19, Strata walidacyjna: 0.0807
Fold 1, Epoka 20, Strata walidacyjna: 0.1109
Fold 1, Epoka 21, Strata walidacyjna: 0.0710
Fold 1, Epoka 22, Strata walidacyjna: 0.1045
Fold 1, E