# 1. Import libraries

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_validate, StratifiedKFold, cross_val_predict, train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, make_scorer, precision_score, recall_score, f1_score, accuracy_score
from joblib import dump
import os
import time
#Models
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

# Tenserflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# 2. Load data

In [34]:
data = pd.read_excel('złączone_dane.xlsx')
data = data.drop('image_id',axis=1)
data = data.drop(columns=[col for col in data.columns if any(x in col for x in ['3_p', '4_p', '5_p'])])

# 3. Preprocessing

In [35]:
X = data.drop('label',axis=1)
y = data['label']
le = LabelEncoder()
y_encoded = le.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

cv = StratifiedKFold(n_splits=5,shuffle=True,random_state=42)

# Definition scorrers for multiclasses
scoring = {
    'accuracy': 'accuracy',
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}

# 4. Sklearn models

In [36]:
# Folders
os.makedirs('models', exist_ok=True)
os.makedirs('logs', exist_ok=True)
os.makedirs('reports', exist_ok=True)
# Models
models = {
    'RandomForest': RandomForestClassifier(),
    'LogisticRegression': LogisticRegression(max_iter=1000),
    'KNN': KNeighborsClassifier(),
    'SVM': SVC(),
    'NaiveBayes': GaussianNB(),
    'DecisionTree': DecisionTreeClassifier(),
    'MLP': MLPClassifier(max_iter=500)
}

for name, model in models.items():
    print(f'\n🔍 Trening modelu: {name}')
    start_time = time.time()

    # Cross-validate + szczegółowe metryki (predykcja foldowa)
    scores = cross_validate(model, X_scaled, y_encoded, cv=cv, scoring=scoring, return_train_score=False)
    y_pred = cross_val_predict(model, X_scaled, y_encoded, cv=cv)

    # Trening końcowy
    model.fit(X_scaled, y_encoded)
    training_time = time.time() - start_time

    # Zapis modelu
    dump(model, f'models/{name}.pkl')

    # Raport klasyfikacji (dla każdej klasy osobno)
    report = classification_report(y_encoded, y_pred, digits=4)

    # Średnie metryki
    avg_acc = np.mean(scores['test_accuracy'])
    avg_prec = np.mean(scores['test_precision'])
    avg_rec = np.mean(scores['test_recall'])
    avg_f1 = np.mean(scores['test_f1'])

    # === Zapis raportu ===
    with open(f'reports/{name}_report.txt', 'w', encoding='utf-8') as f:
        f.write(f"Model: {name}\n\n")
        f.write("=== Klasyfikacja szczegółowa ===\n")
        f.write(report)
        f.write("\n\n=== Średnie metryki z cross-validation ===\n")
        f.write(f"Accuracy: {avg_acc:.4f}\n")
        f.write(f"Precision (macro): {avg_prec:.4f}\n")
        f.write(f"Recall (macro): {avg_rec:.4f}\n")
        f.write(f"F1 Score (macro): {avg_f1:.4f}\n")
        f.write(f"\nCzas treningu: {training_time:.2f} sekund\n")

    # === Zapis logu ===
    with open(f'logs/{name}_log.txt', 'w', encoding='utf-8') as f:
        f.write(f"Model: {name}\n")
        f.write(f"Czas treningu: {training_time:.2f} s\n")
        f.write(f"Parametry: {model.get_params()}\n")
        f.write("\nŚrednie metryki:\n")
        f.write(f"Accuracy: {avg_acc:.4f}\n")
        f.write(f"Precision: {avg_prec:.4f}\n")
        f.write(f"Recall: {avg_rec:.4f}\n")
        f.write(f"F1: {avg_f1:.4f}\n")

    # === Konsola ===
    print('|====================|')
    print(f"Accuracy: {avg_acc:.4f}")
    print(f"Precision: {avg_prec:.4f}")
    print(f"Recall: {avg_rec:.4f}")
    print(f"F1: {avg_f1:.4f}")
    print(f"Czas treningu: {training_time:.2f} s")
    print('|====================|')



🔍 Trening modelu: RandomForest
Accuracy: 0.9900
Precision: 0.9718
Recall: 0.9588
F1: 0.9637
Czas treningu: 211.90 s

🔍 Trening modelu: LogisticRegression
Accuracy: 0.9757
Precision: 0.9639
Recall: 0.9472
F1: 0.9534
Czas treningu: 38.64 s

🔍 Trening modelu: KNN
Accuracy: 0.9265
Precision: 0.9209
Recall: 0.9008
F1: 0.9074
Czas treningu: 2.49 s

🔍 Trening modelu: SVM
Accuracy: 0.9356
Precision: 0.9345
Recall: 0.9203
F1: 0.9253
Czas treningu: 102.29 s

🔍 Trening modelu: NaiveBayes
Accuracy: 0.7868
Precision: 0.7771
Recall: 0.7880
F1: 0.7747
Czas treningu: 3.67 s

🔍 Trening modelu: DecisionTree
Accuracy: 0.9755
Precision: 0.9268
Recall: 0.9044
F1: 0.9121
Czas treningu: 49.77 s

🔍 Trening modelu: MLP
Accuracy: 0.9893
Precision: 0.9744
Recall: 0.9655
F1: 0.9689
Czas treningu: 123.19 s


# 5. TenserFlow model

In [37]:
start_time = time.time()

# Podział do walidacji i raportu
X_train_tf, X_val_tf, y_train_tf, y_val_tf = train_test_split(X_scaled, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

model_tf = Sequential([
    Dense(128, activation='relu', input_shape=(X_scaled.shape[1],)),
    Dense(64, activation='relu'),
    Dense(len(np.unique(y_encoded)), activation='softmax')
])

model_tf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model_tf.fit(X_train_tf, y_train_tf, validation_data=(X_val_tf, y_val_tf), epochs=50, callbacks=[early_stop], verbose=0)

training_time_tf = time.time() - start_time
model_tf.save("models/NeuralNet_TF.h5")

# Ewaluacja
y_pred_tf = np.argmax(model_tf.predict(X_val_tf), axis=1)
report_tf = classification_report(y_val_tf, y_pred_tf, digits=4)

# Metryki
acc_tf = accuracy_score(y_val_tf, y_pred_tf)
prec_tf = precision_score(y_val_tf, y_pred_tf, average='macro')
rec_tf = recall_score(y_val_tf, y_pred_tf, average='macro')
f1_tf = f1_score(y_val_tf, y_pred_tf, average='macro')

# === Zapis raportu ===
with open('reports/NeuralNet_TF_report.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_TF\n\n")
    f.write("=== Klasyfikacja szczegółowa ===\n")
    f.write(report_tf)
    f.write("\n\n=== Średnie metryki z cross-validation ===\n")
    f.write(f"Accuracy: {acc_tf:.4f}\n")
    f.write(f"Precision (macro): {prec_tf:.4f}\n")
    f.write(f"Recall (macro): {rec_tf:.4f}\n")
    f.write(f"F1 Score (macro): {f1_tf:.4f}\n")
    f.write(f"\nCzas treningu: {training_time_tf:.2f} sekund\n")

# === Zapis logu ===
with open('logs/NeuralNet_TF_log.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_TF\n")
    f.write(f"Czas treningu: {training_time_tf:.2f} s\n")
    f.write(f"Epoki: {len(history.history['loss'])}\n")
    f.write(f"Parametry: {model_tf.count_params()} total\n")
    f.write("\nŚrednie metryki:\n")
    f.write(f"Accuracy: {acc_tf:.4f}\n")
    f.write(f"Precision: {prec_tf:.4f}\n")
    f.write(f"Recall: {rec_tf:.4f}\n")
    f.write(f"F1: {f1_tf:.4f}\n")

# === Konsola ===
print('\n🔍 Trening modelu: NeuralNet_TF')
print('|====================|')
print(f"Accuracy: {acc_tf:.4f}")
print(f"Precision: {prec_tf:.4f}")
print(f"Recall: {rec_tf:.4f}")
print(f"F1: {f1_tf:.4f}")
print(f"Czas treningu: {training_time_tf:.2f} s")
print('|====================|')




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

🔍 Trening modelu: NeuralNet_TF
Accuracy: 0.9915
Precision: 0.9798
Recall: 0.9742
F1: 0.9758
Czas treningu: 12.74 s


# 6. PyTorch model

In [38]:
start_time = time.time()

# Dane
X_train_pt, X_val_pt, y_train_pt, y_val_pt = train_test_split(X_scaled, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

X_train_tensor = torch.tensor(X_train_pt, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_pt, dtype=torch.long)
X_val_tensor = torch.tensor(X_val_pt, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_pt, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

class Net(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )
    def forward(self, x): return self.net(x)

model_pt = Net(X_scaled.shape[1], len(np.unique(y_encoded)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_pt.parameters(), lr=0.001)

# Trening
for epoch in range(20):
    for xb, yb in train_loader:
        optimizer.zero_grad()
        out = model_pt(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()

training_time_pt = time.time() - start_time
torch.save(model_pt.state_dict(), "models/NeuralNet_PT.pt")

# Ewaluacja
with torch.no_grad():
    y_pred_pt = model_pt(X_val_tensor).argmax(dim=1).numpy()
    y_true_pt = y_val_tensor.numpy()

report_pt = classification_report(y_true_pt, y_pred_pt, digits=4)

acc_pt = accuracy_score(y_true_pt, y_pred_pt)
prec_pt = precision_score(y_true_pt, y_pred_pt, average='macro')
rec_pt = recall_score(y_true_pt, y_pred_pt, average='macro')
f1_pt = f1_score(y_true_pt, y_pred_pt, average='macro')

# === Zapis raportu ===
with open('reports/NeuralNet_PT_report.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_PT\n\n")
    f.write("=== Klasyfikacja szczegółowa ===\n")
    f.write(report_pt)
    f.write("\n\n=== Średnie metryki z cross-validation ===\n")
    f.write(f"Accuracy: {acc_pt:.4f}\n")
    f.write(f"Precision (macro): {prec_pt:.4f}\n")
    f.write(f"Recall (macro): {rec_pt:.4f}\n")
    f.write(f"F1 Score (macro): {f1_pt:.4f}\n")
    f.write(f"\nCzas treningu: {training_time_pt:.2f} sekund\n")

# === Zapis logu ===
with open('logs/NeuralNet_PT_log.txt', 'w', encoding='utf-8') as f:
    f.write("Model: NeuralNet_PT\n")
    f.write(f"Czas treningu: {training_time_pt:.2f} s\n")
    f.write(f"Epoki: 20\n")
    f.write(f"Parametry: {sum(p.numel() for p in model_pt.parameters())} total\n")
    f.write("\nŚrednie metryki:\n")
    f.write(f"Accuracy: {acc_pt:.4f}\n")
    f.write(f"Precision: {prec_pt:.4f}\n")
    f.write(f"Recall: {rec_pt:.4f}\n")
    f.write(f"F1: {f1_pt:.4f}\n")

# === Konsola ===
print('\n🔍 Trening modelu: NeuralNet_PT')
print('|====================|')
print(f"Accuracy: {acc_pt:.4f}")
print(f"Precision: {prec_pt:.4f}")
print(f"Recall: {rec_pt:.4f}")
print(f"F1: {f1_pt:.4f}")
print(f"Czas treningu: {training_time_pt:.2f} s")
print('|====================|')



🔍 Trening modelu: NeuralNet_PT
Accuracy: 0.9911
Precision: 0.9745
Recall: 0.9641
F1: 0.9656
Czas treningu: 14.91 s
