<a href="https://colab.research.google.com/github/Ponczeks/image-classification-comparison/blob/main/Cifar_10_labels_and_noise.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**ETYKIETY**

# Ładowanie danych

In [None]:
import torch
import numpy as np
import random
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pandas as pd

N_train   = 50000
N_test    = 10000
label_noise_ratio = 0.2
num_classes = 10

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

mean = (0.4914, 0.4822, 0.4465)
std  = (0.2470, 0.2435, 0.2616)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


train_ds = datasets.CIFAR10(root='./data', train=True,  download=True, transform=transform)
test_ds  = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

perm_tr    = np.random.permutation(len(train_ds))[:N_train]
X_train    = torch.stack([train_ds[i][0] for i in perm_tr])
y_train    = np.array([train_ds[i][1] for i in perm_tr])

perm_te    = np.random.permutation(len(test_ds))[:N_test]
X_test     = torch.stack([test_ds[i][0] for i in perm_te])
y_test     = np.array([test_ds[i][1] for i in perm_te])

def add_label_noise(y, noise_ratio=label_noise_ratio, num_classes=num_classes):
    y_noisy = y.copy()
    n = len(y_noisy)
    k = int(noise_ratio * n)
    idx = np.random.choice(n, k, replace=False)
    for i in idx:
        orig = y_noisy[i]
        choices = list(range(num_classes))
        choices.remove(orig)
        y_noisy[i] = random.choice(choices)
    return y_noisy


y_train_clean = y_train.copy()
y_train_noisy = add_label_noise(y_train)
batch_size = 128
train_loader_clean = DataLoader(list(zip(X_train, y_train_clean)), batch_size=batch_size, shuffle=True)
train_loader_noisy = DataLoader(list(zip(X_train, y_train_noisy)), batch_size=batch_size, shuffle=True)
test_loader_clean = DataLoader(list(zip(X_test, y_test)), batch_size=batch_size, shuffle=False)

print("Data loading and preparation complete.")
print(f"Clean training data samples: {len(train_loader_clean.dataset)}")
print(f"Noisy training data samples: {len(train_loader_noisy.dataset)}")
print(f"Clean test data samples: {len(test_loader_clean.dataset)}")

Data loading and preparation complete.
Clean training data samples: 50000
Noisy training data samples: 50000
Clean test data samples: 10000


#KNN

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import torch
from sklearn.neighbors import KNeighborsClassifier
X_train_flat = X_train.view(X_train.size(0), -1).cpu().numpy()
X_test_flat = X_test.view(X_test.size(0), -1).cpu().numpy()

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

pca = PCA(n_components=100, random_state=SEED)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
y_train_clean_np = y_train_clean
y_train_noisy_np = y_train_noisy
y_test_np = y_test


def knn_classify_pca(x_train_pca, y_train_np, x_test_pca, k):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train_pca, y_train_np)
    return knn.predict(x_test_pca)

ks = [1, 3, 5, 7, 9, 11]
results_knn = []

print("=== kNN Performance on Clean Test Set ===")
print(f"{'Train Data':<15} | {'k':<5} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 80)

for k in ks:
    # Train on clean data, test on clean data
    y_pred_clean = knn_classify_pca(X_train_pca, y_train_clean_np, X_test_pca, k)
    acc_clean = accuracy_score(y_test_np, y_pred_clean)
    prec_clean = precision_score(y_test_np, y_pred_clean, average='macro', zero_division=0)
    rec_clean = recall_score(y_test_np, y_pred_clean, average='macro', zero_division=0)
    f1_clean = f1_score(y_test_np, y_pred_clean, average='macro', zero_division=0)
    results_knn.append({
        'Classifier': 'kNN',
        'Training Data': 'Clean',
        'k': k,
        'Accuracy': acc_clean,
        'Precision': prec_clean,
        'Recall': rec_clean,
        'F1-score': f1_clean
    })
    print(f"{'Clean':<15} | {k:<5} | {acc_clean:<10.4f} | {prec_clean:<10.4f} | {rec_clean:<10.4f} | {f1_clean:<10.4f}")


    # Train on noisy data, test on clean data
    y_pred_noisy = knn_classify_pca(X_train_pca, y_train_noisy_np, X_test_pca, k)
    acc_noisy = accuracy_score(y_test_np, y_pred_noisy)
    prec_noisy = precision_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)
    rec_noisy = recall_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)
    f1_noisy = f1_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)
    results_knn.append({
        'Classifier': 'kNN',
        'Training Data': '20% Label Noise',
        'k': k,
        'Accuracy': acc_noisy,
        'Precision': prec_noisy,
        'Recall': rec_noisy,
        'F1-score': f1_noisy
    })
    print(f"{'20% Label Noise':<15} | {k:<5} | {acc_noisy:<10.4f} | {prec_noisy:<10.4f} | {rec_noisy:<10.4f} | {f1_noisy:<10.4f}")

results_df_knn = pd.DataFrame(results_knn)
print("\n--- kNN Results Summary ---")
display(results_df_knn)

=== kNN Performance on Clean Test Set ===
Train Data      | k     | Accuracy   | Precision  | Recall     | F1-score  
--------------------------------------------------------------------------------
Clean           | 1     | 0.3857     | 0.4212     | 0.3857     | 0.3859    
20% Label Noise | 1     | 0.3244     | 0.3403     | 0.3244     | 0.3230    
Clean           | 3     | 0.3692     | 0.4441     | 0.3692     | 0.3669    
20% Label Noise | 3     | 0.3246     | 0.3921     | 0.3246     | 0.3180    
Clean           | 5     | 0.3840     | 0.4510     | 0.3840     | 0.3795    
20% Label Noise | 5     | 0.3490     | 0.4025     | 0.3490     | 0.3431    
Clean           | 7     | 0.3808     | 0.4562     | 0.3808     | 0.3759    
20% Label Noise | 7     | 0.3553     | 0.4165     | 0.3553     | 0.3491    
Clean           | 9     | 0.3847     | 0.4625     | 0.3847     | 0.3787    
20% Label Noise | 9     | 0.3642     | 0.4330     | 0.3642     | 0.3573    
Clean           | 11    | 0.3892     | 0.

Unnamed: 0,Classifier,Training Data,k,Accuracy,Precision,Recall,F1-score
0,kNN,Clean,1,0.3857,0.421208,0.3857,0.385881
1,kNN,20% Label Noise,1,0.3244,0.340311,0.3244,0.323009
2,kNN,Clean,3,0.3692,0.444107,0.3692,0.366851
3,kNN,20% Label Noise,3,0.3246,0.392129,0.3246,0.317997
4,kNN,Clean,5,0.384,0.45098,0.384,0.379514
5,kNN,20% Label Noise,5,0.349,0.40248,0.349,0.343098
6,kNN,Clean,7,0.3808,0.456209,0.3808,0.3759
7,kNN,20% Label Noise,7,0.3553,0.416525,0.3553,0.349148
8,kNN,Clean,9,0.3847,0.462547,0.3847,0.378683
9,kNN,20% Label Noise,9,0.3642,0.433009,0.3642,0.357295


#SVM

In [None]:
import cupy as cp
from cuml.svm import SVC as cuSVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

X_train = get_ipython().user_ns['X_train']
X_test  = get_ipython().user_ns['X_test']
y_train_clean = get_ipython().user_ns['y_train_clean']
y_train_noisy = get_ipython().user_ns['y_train_noisy']
y_test = get_ipython().user_ns['y_test']
SEED   = get_ipython().user_ns['SEED']
X_train_flat = X_train.view(X_train.size(0), -1).cpu().numpy()
X_test_flat  = X_test .view(X_test.size(0), -1).cpu().numpy()


scaler = StandardScaler().fit(X_train_flat)
X_train_scaled_clean = scaler.transform(X_train_flat)
X_train_scaled_noisy = scaler.transform(X_train_flat)
X_test_scaled        = scaler.transform(X_test_flat)


pca = PCA(n_components=100, random_state=SEED).fit(X_train_scaled_clean)
X_train_pca_clean = pca.transform(X_train_scaled_clean)
X_train_pca_noisy = pca.transform(X_train_scaled_noisy)
X_test_pca        = pca.transform(X_test_scaled)


y_train_clean_np = y_train_clean
y_train_noisy_np = y_train_noisy
y_test_np        = y_test
param_grid = {'C': [0.1, 1, 10], 'kernel': ['rbf']}
results_svm = []

best_acc_clean_svm = 0
best_params_clean_svm = {}
best_acc_noisy_svm = 0
best_params_noisy_svm = {}

print("=== cuML SVM (GPU) Performance ===")
print(f"{'Train Data':<15} | {'Kernel':<8} | {'C':<5} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 90)

for kernel in param_grid['kernel']:
    for C in param_grid['C']:
        Xc = cp.asarray(X_train_pca_clean)
        Xn = cp.asarray(X_train_pca_noisy)
        Xt = cp.asarray(X_test_pca)
        yc = cp.asarray(y_train_clean_np)
        yn = cp.asarray(y_train_noisy_np)
        clf_clean = cuSVC(kernel=kernel, C=C, gamma='scale', random_state=SEED)
        clf_clean.fit(Xc, yc)
        y_pred_clean = cp.asnumpy(clf_clean.predict(Xt))
        acc_c  = accuracy_score(y_test_np, y_pred_clean)
        prec_c = precision_score(y_test_np, y_pred_clean, average='macro', zero_division=0)
        rec_c  = recall_score(y_test_np, y_pred_clean, average='macro', zero_division=0)
        f1_c   = f1_score(y_test_np, y_pred_clean, average='macro', zero_division=0)

        results_svm.append({
            'Classifier':   'cuML SVM',
            'Training Data':'Clean',
            'Kernel':       kernel,
            'C':            C,
            'Accuracy':     acc_c,
            'Precision':    prec_c,
            'Recall':       rec_c,
            'F1-score':     f1_c
        })
        if acc_c > best_acc_clean_svm:
            best_acc_clean_svm = acc_c
            best_params_clean_svm = {'Kernel': kernel, 'C': C}

        print(f"{'Clean':<15} | {kernel:<8} | {C:<5} | {acc_c:<10.4f} | {prec_c:<10.4f} | {rec_c:<10.4f} | {f1_c:<10.4f}")

        clf_noisy = cuSVC(kernel=kernel, C=C, gamma='scale', random_state=SEED)
        clf_noisy.fit(Xn, yn)
        y_pred_noisy = cp.asnumpy(clf_noisy.predict(Xt))

        acc_n  = accuracy_score(y_test_np, y_pred_noisy)
        prec_n = precision_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)
        rec_n  = recall_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)
        f1_n   = f1_score(y_test_np, y_pred_noisy, average='macro', zero_division=0)

        results_svm.append({
            'Classifier':   'cuML SVM',
            'Training Data':'20% Label Noise',
            'Kernel':       kernel,
            'C':            C,
            'Accuracy':     acc_n,
            'Precision':    prec_n,
            'Recall':       rec_n,
            'F1-score':     f1_n
        })
        if acc_n > best_acc_noisy_svm:
            best_acc_noisy_svm = acc_n
            best_params_noisy_svm = {'Kernel': kernel, 'C': C}

        print(f"{'20% Label Noise':<15} | {kernel:<8} | {C:<5} | {acc_n:<10.4f} | {prec_n:<10.4f} | {rec_n:<10.4f} | {f1_n:<10.4f}")

results_df_svm = pd.DataFrame(results_svm)
print("\n--- SVM Results Summary ---")
print(f"Best Clean params: {best_params_clean_svm} → Acc = {best_acc_clean_svm:.4f}")
print(f"Best Noisy params: {best_params_noisy_svm} → Acc = {best_acc_noisy_svm:.4f}")
display(results_df_svm)


=== cuML SVM (GPU) Performance ===
Train Data      | Kernel   | C     | Accuracy   | Precision  | Recall     | F1-score  
------------------------------------------------------------------------------------------
Clean           | rbf      | 0.1   | 0.4636     | 0.4622     | 0.4636     | 0.4600    
20% Label Noise | rbf      | 0.1   | 0.4534     | 0.4516     | 0.4534     | 0.4492    
Clean           | rbf      | 1     | 0.5414     | 0.5407     | 0.5414     | 0.5400    
20% Label Noise | rbf      | 1     | 0.5296     | 0.5274     | 0.5296     | 0.5274    
Clean           | rbf      | 10    | 0.5667     | 0.5659     | 0.5667     | 0.5661    
20% Label Noise | rbf      | 10    | 0.5282     | 0.5279     | 0.5282     | 0.5275    

--- SVM Results Summary ---
Best Clean params: {'Kernel': 'rbf', 'C': 10} → Acc = 0.5667
Best Noisy params: {'Kernel': 'rbf', 'C': 1} → Acc = 0.5296


Unnamed: 0,Classifier,Training Data,Kernel,C,Accuracy,Precision,Recall,F1-score
0,cuML SVM,Clean,rbf,0.1,0.4636,0.462201,0.4636,0.459996
1,cuML SVM,20% Label Noise,rbf,0.1,0.4534,0.451557,0.4534,0.44916
2,cuML SVM,Clean,rbf,1.0,0.5414,0.540681,0.5414,0.539966
3,cuML SVM,20% Label Noise,rbf,1.0,0.5296,0.527445,0.5296,0.527402
4,cuML SVM,Clean,rbf,10.0,0.5667,0.565941,0.5667,0.56607
5,cuML SVM,20% Label Noise,rbf,10.0,0.5282,0.527859,0.5282,0.527547


#Random forest

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
import torch
from cuml.decomposition import PCA as cuPCA
from cuml.preprocessing import StandardScaler as cuStandardScaler
from cuml.ensemble import RandomForestClassifier as cuRandomForestClassifier
import cupy as cp

X_train = get_ipython().user_ns.get('X_train')
X_test = get_ipython().user_ns.get('X_test')
y_train_clean = get_ipython().user_ns.get('y_train_clean')
y_train_noisy = get_ipython().user_ns.get('y_train_noisy')
y_test = get_ipython().user_ns.get('y_test')
SEED = get_ipython().user_ns.get('SEED')
X_train_cp = cp.asarray(X_train.view(X_train.size(0), -1).cpu().numpy())
X_test_cp = cp.asarray(X_test.view(X_test.size(0), -1).cpu().numpy())
y_train_clean_cp = cp.asarray(y_train_clean)
y_train_noisy_cp = cp.asarray(y_train_noisy)
y_test_cp = cp.asarray(y_test)


scaler = cuStandardScaler().fit(X_train_cp)
X_train_scaled_clean = scaler.transform(X_train_cp)
X_train_scaled_noisy = scaler.transform(X_train_cp) # Use the same image data, only labels are noisy
X_test_scaled  = scaler.transform(X_test_cp)


pca = cuPCA(n_components=100).fit(X_train_scaled_clean)
X_train_pca_clean = pca.transform(X_train_scaled_clean)
X_train_pca_noisy = pca.transform(X_train_scaled_noisy)
X_test_pca  = pca.transform(X_test_scaled)
y_test_np = y_test
param_grid_rf = {'n_estimators': [50, 100, 200], 'max_depth': [10, 20, 30]}
results_rf = []

print("\n=== Random Forest Performance on Clean Test Set with Hyperparameter Tuning (GPU) ===")
print(f"{'Train Data':<15} | {'n_estimators':<14} | {'max_depth':<11} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 100)

best_acc_clean_rf = 0
best_params_clean_rf = {}
best_acc_noisy_rf = 0
best_params_noisy_rf = {}

for n_estimators in param_grid_rf['n_estimators']:
    for max_depth in param_grid_rf['max_depth']:
        clf_rf_clean = cuRandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
        clf_rf_clean.fit(X_train_pca_clean, y_train_clean_cp)
        y_pred_rf_clean_cp = clf_rf_clean.predict(X_test_pca)
        y_pred_rf_clean = cp.asnumpy(y_pred_rf_clean_cp)


        acc_clean = accuracy_score(y_test_np, y_pred_rf_clean)
        prec_clean = precision_score(y_test_np, y_pred_rf_clean, average='macro', zero_division=0)
        rec_clean = recall_score(y_test_np, y_pred_rf_clean, average='macro', zero_division=0)
        f1_clean = f1_score(y_test_np, y_pred_rf_clean, average='macro', zero_division=0)

        results_rf.append({
            'Classifier': 'Random Forest',
            'Training Data': 'Clean',
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'Accuracy': acc_clean,
            'Precision': prec_clean,
            'Recall': rec_clean,
            'F1-score': f1_clean,
        })

        if acc_clean > best_acc_clean_rf:
            best_acc_clean_rf = acc_clean
            best_params_clean_rf = {'n_estimators': n_estimators, 'max_depth': max_depth}

        print(f"{'Clean':<15} | {n_estimators:<14} | {str(max_depth):<11} | {acc_clean:<10.4f} | {prec_clean:<10.4f} | {rec_clean:<10.4f} | {f1_clean:<10.4f}")

        clf_rf_noisy = cuRandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
        clf_rf_noisy.fit(X_train_pca_noisy, y_train_noisy_cp)
        y_pred_rf_noisy_cp = clf_rf_noisy.predict(X_test_pca)
        y_pred_rf_noisy = cp.asnumpy(y_pred_rf_noisy_cp)
        acc_noisy = accuracy_score(y_test_np, y_pred_rf_noisy)
        prec_noisy = precision_score(y_test_np, y_pred_rf_noisy, average='macro', zero_division=0)
        rec_noisy = recall_score(y_test_np, y_pred_rf_noisy, average='macro', zero_division=0)
        f1_noisy = f1_score(y_test_np, y_pred_rf_noisy, average='macro', zero_division=0)

        results_rf.append({
            'Classifier': 'Random Forest',
            'Training Data': '20% Label Noise',
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'Accuracy': acc_noisy,
            'Precision': prec_noisy,
            'Recall': rec_noisy,
            'F1-score': f1_noisy,
        })

        if acc_noisy > best_acc_noisy_rf:
            best_acc_noisy_rf = acc_noisy
            best_params_noisy_rf = {'n_estimators': n_estimators, 'max_depth': max_depth}

        print(f"{'20% Label Noise':<15} | {n_estimators:<14} | {str(max_depth):<11} | {acc_noisy:<10.4f} | {prec_noisy:<10.4f} | {rec_noisy:<10.4f} | {f1_noisy:<10.4f}")

results_df_rf = pd.DataFrame(results_rf)

print("\n--- Random Forest Results Summary ---")
print(f"Best parameters for Clean Training Data: {best_params_clean_rf} with Accuracy: {best_acc_clean_rf:.4f}")
print(f"Best parameters for 20% Label Noise Training Data: {best_acc_noisy_rf} with Accuracy: {best_acc_noisy_rf:.4f}")
display(results_df_rf)


=== Random Forest Performance on Clean Test Set with Hyperparameter Tuning (GPU) ===
Train Data      | n_estimators   | max_depth   | Accuracy   | Precision  | Recall     | F1-score  
----------------------------------------------------------------------------------------------------
Clean           | 50             | 10          | 0.3965     | 0.3928     | 0.3965     | 0.3893    
20% Label Noise | 50             | 10          | 0.3947     | 0.3928     | 0.3947     | 0.3871    
Clean           | 50             | 20          | 0.4155     | 0.4127     | 0.4155     | 0.4121    
20% Label Noise | 50             | 20          | 0.3832     | 0.3794     | 0.3832     | 0.3793    
Clean           | 50             | 30          | 0.4030     | 0.4009     | 0.4030     | 0.4012    
20% Label Noise | 50             | 30          | 0.3762     | 0.3749     | 0.3762     | 0.3748    
Clean           | 100            | 10          | 0.4020     | 0.3973     | 0.4020     | 0.3933    
20% Label Noise | 100

Unnamed: 0,Classifier,Training Data,n_estimators,max_depth,Accuracy,Precision,Recall,F1-score
0,Random Forest,Clean,50,10,0.3965,0.392789,0.3965,0.389295
1,Random Forest,20% Label Noise,50,10,0.3947,0.392808,0.3947,0.387099
2,Random Forest,Clean,50,20,0.4155,0.412714,0.4155,0.412147
3,Random Forest,20% Label Noise,50,20,0.3832,0.379431,0.3832,0.379315
4,Random Forest,Clean,50,30,0.403,0.400856,0.403,0.401219
5,Random Forest,20% Label Noise,50,30,0.3762,0.374946,0.3762,0.374784
6,Random Forest,Clean,100,10,0.402,0.397312,0.402,0.393316
7,Random Forest,20% Label Noise,100,10,0.4028,0.400326,0.4028,0.393734
8,Random Forest,Clean,100,20,0.4403,0.437667,0.4403,0.436192
9,Random Forest,20% Label Noise,100,20,0.425,0.421859,0.425,0.420156


#CNN

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super().__init__()l
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)


    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1, num_classes=10):
        super().__init__()
        self.smoothing = smoothing
        self.num_classes = num_classes

    def forward(self, inputs, targets):
        log_prob = F.log_softmax(inputs, dim=-1)
        weight = inputs.new_ones(inputs.size()) * \
                 self.smoothing / (self.num_classes - 1)
        weight.scatter_(1, targets.unsqueeze(1), (1.0 - self.smoothing))
        loss = (weight * log_prob).sum(dim=-1).mean()
        return -loss

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd


model_clean = SimpleCNN(dropout_rate=0.5)
model_noisy = SimpleCNN(dropout_rate=0.5)
criterion_clean = nn.CrossEntropyLoss()
criterion_noisy = LabelSmoothingCrossEntropy(smoothing=0.1, num_classes=num_classes)


optimizer_clean = optim.Adam(model_clean.parameters(), lr=0.001)
optimizer_noisy = optim.Adam(model_noisy.parameters(), lr=0.001)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_clean.to(device)
model_noisy.to(device)


EPOCHS = 20

print(f"Training models on {device}")
print("\n--- Training on Clean Data ---")
for epoch in range(EPOCHS):
    model_clean.train()
    running_loss = 0.0
    for inputs, labels in train_loader_clean:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer_clean.zero_grad()
        outputs = model_clean(inputs)
        loss = criterion_clean(outputs, labels)
        loss.backward()
        optimizer_clean.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader_clean):.4f}")

print("\n--- Training on 20% Label Noise with Label Smoothing ---")
for epoch in range(EPOCHS):
    model_noisy.train()
    running_loss = 0.0
    for inputs, labels in train_loader_noisy:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer_noisy.zero_grad()
        outputs = model_noisy(inputs)
        loss = criterion_noisy(outputs, labels) # label smoothing
        loss.backward()
        optimizer_noisy.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader_noisy):.4f}")

print("\nTraining complete for both models.")


def evaluate_model(model, data_loader, device):
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
    recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

    return accuracy, precision, recall, f1

# Evaluate the model trained on clean data
print("\n--- Evaluating Model Trained on Clean Data (on Clean Test Set) ---")
acc_clean_cnn, prec_clean_cnn, rec_clean_cnn, f1_clean_cnn = evaluate_model(model_clean, test_loader_clean, device)
print(f"Accuracy:  {acc_clean_cnn:.4f}")
print(f"Precision: {prec_clean_cnn:.4f}")
print(f"Recall:    {rec_clean_cnn:.4f}")
print(f"F1-score:  {f1_clean_cnn:.4f}")

# Evaluate the model trained on noisy data
print("\n--- Evaluating Model Trained on 20% Label Noise (on Clean Test Set) ---")
acc_noisy_cnn, prec_noisy_cnn, rec_noisy_cnn, f1_noisy_cnn = evaluate_model(model_noisy, test_loader_clean, device)
print(f"Accuracy:  {acc_noisy_cnn:.4f}")
print(f"Precision: {prec_noisy_cnn:.4f}")
print(f"Recall:    {rec_noisy_cnn:.4f}")
print(f"F1-score:  {f1_noisy_cnn:.4f}")

results_cnn = {
    'Classifier': ['SimpleCNN', 'SimpleCNN'],
    'Training Data': ['Clean', '20% Label Noise (Label Smoothing)'],
    'Accuracy': [acc_clean_cnn, acc_noisy_cnn],
    'Precision': [prec_clean_cnn, prec_noisy_cnn],
    'Recall': [rec_clean_cnn, rec_noisy_cnn],
    'F1-score': [f1_clean_cnn, f1_noisy_cnn]
}
results_df_cnn = pd.DataFrame(results_cnn)

print("\n--- Comparison of SimpleCNN Performance on Clean Test Set ---")
display(results_df_cnn)

Training models on cuda

--- Training on Clean Data ---
Epoch 1/20, Loss: 1.7584
Epoch 2/20, Loss: 1.4619
Epoch 3/20, Loss: 1.3525
Epoch 4/20, Loss: 1.2731
Epoch 5/20, Loss: 1.2176
Epoch 6/20, Loss: 1.1674
Epoch 7/20, Loss: 1.1306
Epoch 8/20, Loss: 1.0943
Epoch 9/20, Loss: 1.0664
Epoch 10/20, Loss: 1.0434
Epoch 11/20, Loss: 1.0113
Epoch 12/20, Loss: 0.9951
Epoch 13/20, Loss: 0.9740
Epoch 14/20, Loss: 0.9575
Epoch 15/20, Loss: 0.9449
Epoch 16/20, Loss: 0.9319
Epoch 17/20, Loss: 0.9113
Epoch 18/20, Loss: 0.8979
Epoch 19/20, Loss: 0.8882
Epoch 20/20, Loss: 0.8764

--- Training on 20% Label Noise with Label Smoothing ---
Epoch 1/20, Loss: 2.1074
Epoch 2/20, Loss: 2.0016
Epoch 3/20, Loss: 1.9575
Epoch 4/20, Loss: 1.9278
Epoch 5/20, Loss: 1.9080
Epoch 6/20, Loss: 1.8830
Epoch 7/20, Loss: 1.8662
Epoch 8/20, Loss: 1.8491
Epoch 9/20, Loss: 1.8388
Epoch 10/20, Loss: 1.8289
Epoch 11/20, Loss: 1.8127
Epoch 12/20, Loss: 1.7988
Epoch 13/20, Loss: 1.7934
Epoch 14/20, Loss: 1.7793
Epoch 15/20, Loss: 1

Unnamed: 0,Classifier,Training Data,Accuracy,Precision,Recall,F1-score
0,SimpleCNN,Clean,0.7191,0.721964,0.7191,0.719259
1,SimpleCNN,20% Label Noise (Label Smoothing),0.6793,0.679448,0.6793,0.67832


#Zapisz podsumowanie do jednego pliku

In [None]:
import pandas as pd

results_df_knn = get_ipython().user_ns.get('results_df_knn')
results_df_svm = get_ipython().user_ns.get('results_df_svm')
results_df_rf = get_ipython().user_ns.get('results_df_rf')
results_df_cnn = get_ipython().user_ns.get('results_df_cnn')

def get_params_string(row):
    if row['Classifier'] == 'kNN':
        return f"k={row['k']}"
    elif row['Classifier'] == 'SVM':
        return f"Kernel={row['Kernel']}, C={row['C']}"
    elif row['Classifier'] == 'Random Forest':
        return f"n_estimators={row['n_estimators']}, max_depth={row['max_depth']}"
    elif row['Classifier'] == 'SimpleCNN':
        return "Default/Trained"
    return ""

all_results_df = pd.concat([results_df_knn, results_df_svm, results_df_rf, results_df_cnn], ignore_index=True)
all_results_df['Parameters'] = all_results_df.apply(get_params_string, axis=1)
all_results_df = all_results_df.drop(columns=['k', 'Kernel', 'C', 'n_estimators', 'max_depth'], errors='ignore')

output_file_name = 'classifier_performance_summary.xlsx'

try:
    all_results_df.to_excel(output_file_name, index=False)
    print(f"All results saved successfully to {output_file_name}")
except Exception as e:
    print(f"An error occurred while saving the Excel file: {e}")

print("\n--- Consolidated Results Summary (with Parameters) ---")
display(all_results_df)

All results saved successfully to classifier_performance_summary.xlsx

--- Consolidated Results Summary (with Parameters) ---


Unnamed: 0,Classifier,Training Data,Accuracy,Precision,Recall,F1-score,Parameters
0,kNN,Clean,0.3857,0.421208,0.3857,0.385881,k=1.0
1,kNN,20% Label Noise,0.3244,0.340311,0.3244,0.323009,k=1.0
2,kNN,Clean,0.3692,0.444107,0.3692,0.366851,k=3.0
3,kNN,20% Label Noise,0.3246,0.392129,0.3246,0.317997,k=3.0
4,kNN,Clean,0.384,0.45098,0.384,0.379514,k=5.0
5,kNN,20% Label Noise,0.349,0.40248,0.349,0.343098,k=5.0
6,kNN,Clean,0.3808,0.456209,0.3808,0.3759,k=7.0
7,kNN,20% Label Noise,0.3553,0.416525,0.3553,0.349148,k=7.0
8,kNN,Clean,0.3847,0.462547,0.3847,0.378683,k=9.0
9,kNN,20% Label Noise,0.3642,0.433009,0.3642,0.357295,k=9.0


# **SZUMY**

#Import i preprocesing

In [None]:
import torch
import numpy as np
import random
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import time
import cv2
import os

N_train   = 50000
N_test    =  10000
num_classes = 10
salt_pepper_ratio = 0.1
gaussian_noise_std = 51
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
mean = (0.4914, 0.4822, 0.4465)
std  = (0.2470, 0.2435, 0.2616)

transform_normalize = transforms.Normalize(mean, std)
train_ds_unnormalized = datasets.CIFAR10(root='./data', train=True,  download=True, transform=transforms.ToTensor())
test_ds_unnormalized  = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())
perm_tr    = np.random.permutation(len(train_ds_unnormalized))[:N_train]
X_train_clean_tensor_unnormalized    = torch.stack([train_ds_unnormalized[i][0] for i in perm_tr])
y_train_clean_np    = np.array([train_ds_unnormalized[i][1] for i in perm_tr])

perm_te    = np.random.permutation(len(test_ds_unnormalized))[:N_test]
X_test_clean_tensor_unnormalized     = torch.stack([test_ds_unnormalized[i][0] for i in perm_te])
y_test_clean_np     = np.array([test_ds_unnormalized[i][1] for i in perm_te])

def add_salt_and_pepper_noise(image_tensor, ratio):
    img_np = np.array(image_tensor * 255, dtype=np.uint8).transpose(1, 2, 0)
    row, col, ch = img_np.shape
    s_vs_p = 0.5
    amount = ratio
    out = np.copy(img_np)
    num_salt = np.ceil(amount * img_np.size * s_vs_p).astype(int)
    coords = [np.random.randint(0, i - 1, num_salt) for i in img_np.shape]
    out[tuple(coords)] = 255
    num_pepper = np.ceil(amount * img_np.size * (1. - s_vs_p)).astype(int)
    coords = [np.random.randint(0, i - 1, num_pepper) for i in img_np.shape]
    out[tuple(coords)] = 0
    return torch.from_numpy(out.transpose(2, 0, 1)).float() / 255.0

def add_gaussian_noise(image_tensor, std):
    img_np = np.array(image_tensor * 255, dtype=np.float32).transpose(1, 2, 0)
    row, col, ch = img_np.shape
    mean = 0
    sigma = std
    gauss = np.random.normal(mean, sigma, (row, col, ch))
    gauss = gauss.reshape(row, col, ch)
    noisy_img = img_np + gauss
    noisy_img = np.clip(noisy_img, 0, 255)
    return torch.from_numpy(noisy_img.transpose(2, 0, 1)).float() / 255.0


X_train_salt_pepper_tensor_unnormalized = torch.stack([add_salt_and_pepper_noise(img, salt_pepper_ratio) for img in X_train_clean_tensor_unnormalized])
X_train_gaussian_tensor_unnormalized = torch.stack([add_gaussian_noise(img, gaussian_noise_std) for img in X_train_clean_tensor_unnormalized])

# Apply image noise
X_test_salt_pepper_tensor_unnormalized = torch.stack([add_salt_and_pepper_noise(img, salt_pepper_ratio) for img in X_test_clean_tensor_unnormalized])
X_test_gaussian_tensor_unnormalized = torch.stack([add_gaussian_noise(img, gaussian_noise_std) for img in X_test_clean_tensor_unnormalized])


# Apply normalization
transform_normalize = transforms.Normalize(mean, std)

X_train_clean_tensor_normalized = torch.stack([transform_normalize(img) for img in X_train_clean_tensor_unnormalized])
X_test_clean_tensor_normalized = torch.stack([transform_normalize(img) for img in X_test_clean_tensor_unnormalized])
X_test_salt_pepper_tensor_normalized = torch.stack([transform_normalize(img) for img in X_test_salt_pepper_tensor_unnormalized])
X_test_gaussian_tensor_normalized = torch.stack([transform_normalize(img) for img in X_test_gaussian_tensor_unnormalized])




batch_size = 128

# DataLoaders for classical methods training
train_loader_clean_classical = DataLoader(list(zip(X_train_clean_tensor_unnormalized, y_train_clean_np)), batch_size=batch_size, shuffle=True)
train_loader_salt_pepper_classical = DataLoader(list(zip(X_train_salt_pepper_tensor_unnormalized, y_train_clean_np)), batch_size=batch_size, shuffle=True)
train_loader_gaussian_classical = DataLoader(list(zip(X_train_gaussian_tensor_unnormalized, y_train_clean_np)), batch_size=batch_size, shuffle=True)


# DataLoaders for CNN training
train_loader_clean_cnn = DataLoader(list(zip(X_train_clean_tensor_normalized, y_train_clean_np)), batch_size=batch_size, shuffle=True)


# Test DataLoaders for all evaluations
test_loader_clean = DataLoader(list(zip(X_test_clean_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)
test_loader_salt_pepper = DataLoader(list(zip(X_test_salt_pepper_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)
test_loader_gaussian = DataLoader(list(zip(X_test_gaussian_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)


print("Data loading and preparation complete.")
print(f"Clean training data samples: {len(X_train_clean_tensor_unnormalized)}")
print(f"Salt and Pepper Noise training data samples: {len(X_train_salt_pepper_tensor_unnormalized)}")
print(f"Gaussian Noise training data samples: {len(X_train_gaussian_tensor_unnormalized)}")
print(f"Clean test data samples: {len(X_test_clean_tensor_unnormalized)}")
print(f"Salt and Pepper Noise test data samples: {len(X_test_salt_pepper_tensor_unnormalized)}")
print(f"Gaussian Noise test data samples: {len(X_test_gaussian_tensor_unnormalized)}")



def evaluate_model(model, data_loader, device, y_true_np):
    model.eval()
    all_preds = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())

    accuracy = accuracy_score(y_true_np, all_preds)
    precision = precision_score(y_true_np, all_preds, average='macro', zero_division=0)
    recall = recall_score(y_true_np, all_preds, average='macro', zero_division=0)
    f1 = f1_score(y_true_np, all_preds, average='macro', zero_division=0)

    return accuracy, precision, recall, f1

X_train_clean_flat = X_train_clean_tensor_unnormalized.view(X_train_clean_tensor_unnormalized.size(0), -1).cpu().numpy()
X_train_salt_pepper_flat = X_train_salt_pepper_tensor_unnormalized.view(X_train_salt_pepper_tensor_unnormalized.size(0), -1).cpu().numpy()
X_train_gaussian_flat = X_train_gaussian_tensor_unnormalized.view(X_train_gaussian_tensor_unnormalized.size(0), -1).cpu().numpy()

X_test_clean_flat = X_test_clean_tensor_unnormalized.view(X_test_clean_tensor_unnormalized.size(0), -1).cpu().numpy()
X_test_salt_pepper_flat = X_test_salt_pepper_tensor_unnormalized.view(X_test_salt_pepper_tensor_unnormalized.size(0), -1).cpu().numpy()
X_test_gaussian_flat = X_test_gaussian_tensor_unnormalized.view(X_test_gaussian_tensor_unnormalized.size(0), -1).cpu().numpy()



scaler = StandardScaler().fit(X_train_clean_flat)
X_train_clean_scaled = scaler.transform(X_train_clean_flat)
X_train_salt_pepper_scaled = scaler.transform(X_train_salt_pepper_flat)
X_train_gaussian_scaled = scaler.transform(X_train_gaussian_flat)

X_test_clean_scaled = scaler.transform(X_test_clean_flat)
X_test_salt_pepper_scaled = scaler.transform(X_test_salt_pepper_flat)
X_test_gaussian_scaled = scaler.transform(X_test_gaussian_flat)


# Apply PCA
pca = PCA(n_components=100, random_state=SEED).fit(X_train_clean_scaled)
X_train_clean_pca = pca.transform(X_train_clean_scaled)
X_train_salt_pepper_pca = pca.transform(X_train_salt_pepper_scaled)
X_train_gaussian_pca = pca.transform(X_train_gaussian_scaled)

X_test_clean_pca = pca.transform(X_test_clean_scaled)
X_test_salt_pepper_pca = pca.transform(X_test_salt_pepper_scaled)
X_test_gaussian_pca = pca.transform(X_test_gaussian_scaled)


# Convert labels to numpy arrays
y_train_clean_np = y_train_clean_np
y_test_clean_np = y_test_clean_np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Data loading and preparation complete.
Clean training data samples: 50000
Salt and Pepper Noise training data samples: 50000
Gaussian Noise training data samples: 50000
Clean test data samples: 10000
Salt and Pepper Noise test data samples: 10000
Gaussian Noise test data samples: 10000


#KNN

In [None]:


def knn_classify_pca(x_train_pca, y_train_np, x_test_pca, k):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(x_train_pca, y_train_np)
    return knn.predict(x_test_pca)


ks = [1, 3, 5, 7, 9, 11]
results_knn = []

print("=== kNN Performance ===")
print(f"{'Train Data':<25} | {'Test Data':<20} | {'k':<5} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 130)

train_pca_data = {
    'Clean': X_train_clean_pca,
    'Salt and Pepper Noise': X_train_salt_pepper_pca,
    'Gaussian Noise': X_train_gaussian_pca
}

test_pca_data = {
    'Clean': X_test_clean_pca,
    'Salt and Pepper Noise': X_test_salt_pepper_pca,
    'Gaussian Noise': X_test_gaussian_pca
}


for train_type, x_train_data in train_pca_data.items():
    y_train_data = y_train_clean_np
    for k in ks:
        for test_type, x_test_data in test_pca_data.items():
            y_pred = knn_classify_pca(x_train_data, y_train_data, x_test_data, k)
            acc = accuracy_score(y_test_clean_np, y_pred)
            prec = precision_score(y_test_clean_np, y_pred, average='macro', zero_division=0)
            rec = recall_score(y_test_clean_np, y_pred, average='macro', zero_division=0)
            f1 = f1_score(y_test_clean_np, y_pred, average='macro', zero_division=0)

            results_knn.append({
                'Classifier': 'kNN',
                'Training Data': train_type,
                'Test Data': test_type,
                'k': k,
                'Accuracy': acc,
                'Precision': prec,
                'Recall': rec,
                'F1-score': f1
            })
            print(f"{train_type:<25} | {test_type:<20} | {k:<5} | {acc:<10.4f} | {prec:<10.4f} | {rec:<10.4f} | {f1:<10.4f}")


results_df_knn = pd.DataFrame(results_knn)
print("\n--- kNN Results Summary ---")
display(results_df_knn)


#KNN
=== kNN Performance ===
Train Data                | Test Data            | k     | Accuracy   | Precision  | Recall     | F1-score  
----------------------------------------------------------------------------------------------------------------------------------
Clean                     | Clean                | 1     | 0.3856     | 0.4211     | 0.3856     | 0.3857    
Clean                     | Salt and Pepper Noise | 1     | 0.3627     | 0.4152     | 0.3627     | 0.3606    
Clean                     | Gaussian Noise       | 1     | 0.3653     | 0.4075     | 0.3653     | 0.3618    
Clean                     | Clean                | 3     | 0.3687     | 0.4436     | 0.3687     | 0.3664    
Clean                     | Salt and Pepper Noise | 3     | 0.3421     | 0.4360     | 0.3421     | 0.3351    
Clean                     | Gaussian Noise       | 3     | 0.3435     | 0.4384     | 0.3435     | 0.3370    
Clean                     | Clean                | 5     | 0.3839     | 0.

Unnamed: 0,Classifier,Training Data,Test Data,k,Accuracy,Precision,Recall,F1-score
0,kNN,Clean,Clean,1,0.3856,0.421095,0.3856,0.385748
1,kNN,Clean,Salt and Pepper Noise,1,0.3627,0.415222,0.3627,0.360561
2,kNN,Clean,Gaussian Noise,1,0.3653,0.407487,0.3653,0.361836
3,kNN,Clean,Clean,3,0.3687,0.44358,0.3687,0.366432
4,kNN,Clean,Salt and Pepper Noise,3,0.3421,0.436006,0.3421,0.33515
5,kNN,Clean,Gaussian Noise,3,0.3435,0.438414,0.3435,0.337009
6,kNN,Clean,Clean,5,0.3839,0.451112,0.3839,0.379326
7,kNN,Clean,Salt and Pepper Noise,5,0.3487,0.435549,0.3487,0.338942
8,kNN,Clean,Gaussian Noise,5,0.3556,0.437005,0.3556,0.345596
9,kNN,Clean,Clean,7,0.3805,0.456123,0.3805,0.375717


#SVM

In [None]:

from cuml.svm import SVC as cuSVC
import cupy as cp


X_train_clean_pca_cp = cp.asarray(X_train_clean_pca)
X_train_salt_pepper_pca_cp = cp.asarray(X_train_salt_pepper_pca)
X_train_gaussian_pca_cp = cp.asarray(X_train_gaussian_pca)

X_test_clean_pca_cp = cp.asarray(X_test_clean_pca)
X_test_salt_pepper_pca_cp = cp.asarray(X_test_salt_pepper_pca)
X_test_gaussian_pca_cp = cp.asarray(X_test_gaussian_pca)

s
y_train_clean_cp = cp.asarray(y_train_clean_np)
y_test_clean_cp = cp.asarray(y_test_clean_np)
param_grid_svm = {'C': [0.1, 1, 10], 'kernel': ['rbf']}
results_svm = []

print("=== SVM Performance (GPU) ===")
print(f"{'Train Data':<25} | {'Test Data':<20} | {'Kernel':<8} | {'C':<5} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 140)

train_data_svm_cp = {
    'Clean': X_train_clean_pca_cp,
    'Salt and Pepper Noise': X_train_salt_pepper_pca_cp,
    'Gaussian Noise': X_train_gaussian_pca_cp
}

test_data_svm_cp = {
    'Clean': X_test_clean_pca_cp,
    'Salt and Pepper Noise': X_test_salt_pepper_pca_cp,
    'Gaussian Noise': X_test_gaussian_pca_cp
}


for train_type, x_train_cp in train_data_svm_cp.items():
    y_train_cp = y_train_clean_cp
    for kernel in param_grid_svm['kernel']:
        for C in param_grid_svm['C']:
             for test_type, x_test_cp in test_data_svm_cp.items():
                clf_svm = cuSVC(kernel=kernel, C=C, gamma='scale', random_state=SEED)
                clf_svm.fit(x_train_cp, y_train_cp)
                y_pred_cp = clf_svm.predict(x_test_cp)
                y_pred_np = cp.asnumpy(y_pred_cp)

                acc = accuracy_score(y_test_clean_np, y_pred_np)
                prec = precision_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)
                rec = recall_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)
                f1 = f1_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)

                results_svm.append({
                    'Classifier': 'SVM',
                    'Training Data': train_type,
                    'Test Data': test_type,
                    'Kernel': kernel,
                    'C': C,
                    'Accuracy': acc,
                    'Precision': prec,
                    'Recall': rec,
                    'F1-score': f1
                })
                print(f"{train_type:<25} | {test_type:<20} | {kernel:<8} | {C:<5} | {acc:<10.4f} | {prec:<10.4f} | {rec:<10.4f} | {f1:<10.4f}")


results_df_svm = pd.DataFrame(results_svm)
print("\n--- SVM Results Summary ---")
display(results_df_svm)


#SVM
=== SVM Performance (GPU) ===
Train Data                | Test Data            | Kernel   | C     | Accuracy   | Precision  | Recall     | F1-score  
--------------------------------------------------------------------------------------------------------------------------------------------
Clean                     | Clean                | rbf      | 0.1   | 0.4636     | 0.4622     | 0.4636     | 0.4600    
Clean                     | Salt and Pepper Noise | rbf      | 0.1   | 0.4522     | 0.4541     | 0.4522     | 0.4480    
Clean                     | Gaussian Noise       | rbf      | 0.1   | 0.4581     | 0.4585     | 0.4581     | 0.4548    
Clean                     | Clean                | rbf      | 1     | 0.5413     | 0.5406     | 0.5413     | 0.5399    
Clean                     | Salt and Pepper Noise | rbf      | 1     | 0.5254     | 0.5304     | 0.5254     | 0.5240    
Clean                     | Gaussian Noise       | rbf      | 1     | 0.5268     | 0.5281     | 0.526

Unnamed: 0,Classifier,Training Data,Test Data,Kernel,C,Accuracy,Precision,Recall,F1-score
0,SVM,Clean,Clean,rbf,0.1,0.4636,0.462198,0.4636,0.459996
1,SVM,Clean,Salt and Pepper Noise,rbf,0.1,0.4522,0.454051,0.4522,0.448029
2,SVM,Clean,Gaussian Noise,rbf,0.1,0.4581,0.458528,0.4581,0.454769
3,SVM,Clean,Clean,rbf,1.0,0.5413,0.540583,0.5413,0.539868
4,SVM,Clean,Salt and Pepper Noise,rbf,1.0,0.5254,0.530446,0.5254,0.52395
5,SVM,Clean,Gaussian Noise,rbf,1.0,0.5268,0.528087,0.5268,0.525538
6,SVM,Clean,Clean,rbf,10.0,0.5667,0.565908,0.5667,0.566051
7,SVM,Clean,Salt and Pepper Noise,rbf,10.0,0.553,0.558593,0.553,0.552744
8,SVM,Clean,Gaussian Noise,rbf,10.0,0.5471,0.548035,0.5471,0.546586
9,SVM,Salt and Pepper Noise,Clean,rbf,0.1,0.4575,0.459771,0.4575,0.45422


#Random Forest

In [None]:

from cuml.ensemble import RandomForestClassifier as cuRandomForestClassifier
param_grid_rf = {'n_estimators': [50, 100, 200], 'max_depth': [10, 20, 30]}
results_rf = []

print("\n=== Random Forest Performance (GPU) ===")
print(f"{'Train Data':<25} | {'Test Data':<20} | {'n_estimators':<14} | {'max_depth':<11} | {'Accuracy':<10} | {'Precision':<10} | {'Recall':<10} | {'F1-score':<10}")
print("-" * 160)

train_data_rf_cp = {
    'Clean': X_train_clean_pca_cp,
    'Salt and Pepper Noise': X_train_salt_pepper_pca_cp,
    'Gaussian Noise': X_train_gaussian_pca_cp
}

test_data_rf_cp = {
    'Clean': X_test_clean_pca_cp,
    'Salt and Pepper Noise': X_test_salt_pepper_pca_cp,
    'Gaussian Noise': X_test_gaussian_pca_cp
}


for train_type, x_train_cp in train_data_rf_cp.items():
    y_train_cp = y_train_clean_cp
    for n_estimators in param_grid_rf['n_estimators']:
        for max_depth in param_grid_rf['max_depth']:
             for test_type, x_test_cp in test_data_rf_cp.items():
                clf_rf = cuRandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
                clf_rf.fit(x_train_cp, y_train_cp)
                y_pred_cp = clf_rf.predict(x_test_cp)
                y_pred_np = cp.asnumpy(y_pred_cp)

                acc = accuracy_score(y_test_clean_np, y_pred_np)
                prec = precision_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)
                rec = recall_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)
                f1 = f1_score(y_test_clean_np, y_pred_np, average='macro', zero_division=0)

                results_rf.append({
                    'Classifier': 'Random Forest',
                    'Training Data': train_type,
                    'Test Data': test_type,
                    'n_estimators': n_estimators,
                    'max_depth': max_depth,
                    'Accuracy': acc,
                    'Precision': prec,
                    'Recall': rec,
                    'F1-score': f1,
                })
                print(f"{train_type:<25} | {test_type:<20} | {n_estimators:<14} | {str(max_depth):<11} | {acc:<10.4f} | {prec:<10.4f} | {rec:<10.4f} | {f1:<10.4f}")


results_df_rf = pd.DataFrame(results_rf)
print("\n--- Random Forest Results Summary ---")
display(results_df_rf)


#Random forest

=== Random Forest Performance (GPU) ===
Train Data                | Test Data            | n_estimators   | max_depth   | Accuracy   | Precision  | Recall     | F1-score  
----------------------------------------------------------------------------------------------------------------------------------------------------------------
Clean                     | Clean                | 50             | 10          | 0.3984     | 0.3949     | 0.3984     | 0.3907    
Clean                     | Salt and Pepper Noise | 50             | 10          | 0.3932     | 0.3927     | 0.3932     | 0.3875    
Clean                     | Gaussian Noise       | 50             | 10          | 0.3906     | 0.3892     | 0.3906     | 0.3846    
Clean                     | Clean                | 50             | 20          | 0.4125     | 0.4087     | 0.4125     | 0.4085    
Clean                     | Salt and Pepper Noise | 50             | 20          | 0.4052     | 0.4055     | 0.4052     |

Unnamed: 0,Classifier,Training Data,Test Data,n_estimators,max_depth,Accuracy,Precision,Recall,F1-score
0,Random Forest,Clean,Clean,50,10,0.3984,0.394932,0.3984,0.390659
1,Random Forest,Clean,Salt and Pepper Noise,50,10,0.3932,0.392669,0.3932,0.387473
2,Random Forest,Clean,Gaussian Noise,50,10,0.3906,0.389194,0.3906,0.384564
3,Random Forest,Clean,Clean,50,20,0.4125,0.408738,0.4125,0.408531
4,Random Forest,Clean,Salt and Pepper Noise,50,20,0.4052,0.405478,0.4052,0.403021
...,...,...,...,...,...,...,...,...,...
76,Random Forest,Gaussian Noise,Salt and Pepper Noise,200,20,0.4405,0.437254,0.4405,0.435284
77,Random Forest,Gaussian Noise,Gaussian Noise,200,20,0.4418,0.437292,0.4418,0.435959
78,Random Forest,Gaussian Noise,Clean,200,30,0.4547,0.450641,0.4547,0.449070
79,Random Forest,Gaussian Noise,Salt and Pepper Noise,200,30,0.4427,0.438645,0.4427,0.438175


#CNN

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)


    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x


class NoisyDataset(Dataset):
    def __init__(self, data_tensor_unnormalized, labels_np, noise_type=None, noise_ratio=0.05, gaussian_std=25, transform=None):
        self.data = data_tensor_unnormalized
        self.labels = labels_np
        self.noise_type = noise_type
        self.noise_ratio = noise_ratio
        self.gaussian_std = gaussian_std
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]
        if self.noise_type == 'salt_pepper':
            image = add_salt_and_pepper_noise(image, self.noise_ratio)
        elif self.noise_type == 'gaussian':
            image = add_gaussian_noise(image, self.gaussian_std)

        if self.transform:
            image = self.transform(image)

        return image, label


results_cnn = []
EPOCHS = 20 #
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_dataset_clean_cnn = NoisyDataset(X_train_clean_tensor_unnormalized, y_train_clean_np, transform=transform_normalize)
train_dataset_salt_pepper_cnn = NoisyDataset(X_train_clean_tensor_unnormalized, y_train_clean_np, noise_type='salt_pepper', noise_ratio=salt_pepper_ratio, transform=transform_normalize)
train_dataset_gaussian_cnn = NoisyDataset(X_train_clean_tensor_unnormalized, y_train_clean_np, noise_type='gaussian', gaussian_std=gaussian_noise_std, transform=transform_normalize)


train_loader_clean_cnn = DataLoader(train_dataset_clean_cnn, batch_size=batch_size, shuffle=True)
train_loader_salt_pepper_cnn = DataLoader(train_dataset_salt_pepper_cnn, batch_size=batch_size, shuffle=True)
train_loader_gaussian_cnn = DataLoader(train_dataset_gaussian_cnn, batch_size=batch_size, shuffle=True)

train_loaders_cnn = {
    'Clean': train_loader_clean_cnn,
    'Salt and Pepper Noise (Augmentation)': train_loader_salt_pepper_cnn,
    'Gaussian Noise (Augmentation)': train_loader_gaussian_cnn
}

test_loader_clean = DataLoader(list(zip(X_test_clean_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)
test_loader_salt_pepper = DataLoader(list(zip(X_test_salt_pepper_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)
test_loader_gaussian = DataLoader(list(zip(X_test_gaussian_tensor_normalized, y_test_clean_np)), batch_size=batch_size, shuffle=False)

test_loaders_cnn = {
    'Clean': test_loader_clean,
    'Salt and Pepper Noise': test_loader_salt_pepper,
    'Gaussian Noise': test_loader_gaussian
}


print(f"\nTraining CNN models on {device}")

for train_type, train_loader in train_loaders_cnn.items():
    print(f"\n--- Training on {train_type} ---")
    model = SimpleCNN(dropout_rate=0.5).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}")

    for test_type, test_loader in test_loaders_cnn.items():
        print(f"\n--- Evaluating Model Trained on {train_type} (on {test_type} Test Set) ---")
        acc, prec, rec, f1 = evaluate_model(model, test_loader, device, y_test_clean_np)
        print(f"Accuracy:  {acc:.4f}")
        print(f"Precision: {prec:.4f}")
        print(f"Recall:    {rec:.4f}")
        print(f"F1-score:  {f1:.4f}")

        results_cnn.append({
            'Classifier': 'SimpleCNN',
            'Training Data': train_type,
            'Test Data': test_type,
            'Accuracy': acc,
            'Precision': prec,
            'Recall': rec,
            'F1-score': f1
        })

results_df_cnn = pd.DataFrame(results_cnn)
print("\n--- SimpleCNN Results Summary ---")
display(results_df_cnn)


#CNN

Training CNN models on cuda

--- Training on Clean ---
Epoch 1/20, Loss: 1.8474
Epoch 2/20, Loss: 1.5957
Epoch 3/20, Loss: 1.4793
Epoch 4/20, Loss: 1.4087
Epoch 5/20, Loss: 1.3610
Epoch 6/20, Loss: 1.3196
Epoch 7/20, Loss: 1.2882
Epoch 8/20, Loss: 1.2603
Epoch 9/20, Loss: 1.2334
Epoch 10/20, Loss: 1.2164
Epoch 11/20, Loss: 1.1909
Epoch 12/20, Loss: 1.1695
Epoch 13/20, Loss: 1.1533
Epoch 14/20, Loss: 1.1368
Epoch 15/20, Loss: 1.1296
Epoch 16/20, Loss: 1.1114
Epoch 17/20, Loss: 1.0954
Epoch 18/20, Loss: 1.0857
Epoch 19/20, Loss: 1.0747
Epoch 20/20, Loss: 1.0621

--- Evaluating Model Trained on Clean (on Clean Test Set) ---
Accuracy:  0.7040
Precision: 0.7037
Recall:    0.7040
F1-score:  0.7024

--- Evaluating Model Trained on Clean (on Salt and Pepper Noise Test Set) ---
Accuracy:  0.2693
Precision: 0.4212
Recall:    0.2693
F1-score:  0.2098

--- Evaluating Model Trained on Clean (on Gaussian Noise Test Set) ---
Accuracy:  0.2520
Precision: 0.4412
Recall:    0.2520
F1-score:  0.19

Unnamed: 0,Classifier,Training Data,Test Data,Accuracy,Precision,Recall,F1-score
0,SimpleCNN,Clean,Clean,0.704,0.703706,0.704,0.702388
1,SimpleCNN,Clean,Salt and Pepper Noise,0.2693,0.421183,0.2693,0.209761
2,SimpleCNN,Clean,Gaussian Noise,0.252,0.441196,0.252,0.190874
3,SimpleCNN,Salt and Pepper Noise (Augmentation),Clean,0.6525,0.655618,0.6525,0.649194
4,SimpleCNN,Salt and Pepper Noise (Augmentation),Salt and Pepper Noise,0.653,0.654805,0.653,0.65217
5,SimpleCNN,Salt and Pepper Noise (Augmentation),Gaussian Noise,0.2575,0.437591,0.2575,0.202855
6,SimpleCNN,Gaussian Noise (Augmentation),Clean,0.5585,0.600951,0.5585,0.548677
7,SimpleCNN,Gaussian Noise (Augmentation),Salt and Pepper Noise,0.591,0.59781,0.591,0.58526
8,SimpleCNN,Gaussian Noise (Augmentation),Gaussian Noise,0.5877,0.592607,0.5877,0.583164


#zapis

In [None]:

print("\n#Zapisz podsumowanie do jednego pliku")

results_df_knn = results_df_knn
results_df_svm = results_df_svm
results_df_rf = results_df_rf
results_df_cnn = results_df_cnn


def get_params_string(row):
    if row['Classifier'] == 'kNN':
        return f"k={row['k']}"
    elif row['Classifier'] == 'SVM':
        return f"Kernel={row['Kernel']}, C={row['C']}"
    elif row['Classifier'] == 'Random Forest':
        return f"n_estimators={row['n_estimators']}, max_depth={row['max_depth']}"
    elif row['Classifier'] == 'SimpleCNN':
        return "Default/Trained"
    return ""

all_results_df = pd.concat([results_df_knn, results_df_svm, results_df_rf, results_df_cnn], ignore_index=True)
all_results_df['Parameters'] = all_results_df.apply(get_params_string, axis=1)
all_results_df = all_results_df.drop(columns=['k', 'Kernel', 'C', 'n_estimators', 'max_depth'], errors='ignore')
best_results_df = all_results_df.loc[all_results_df.groupby(['Classifier', 'Training Data', 'Test Data'])['Accuracy'].idxmax()]

output_file_name = 'classifier_performance_summary_noise_augmentation_cross_test.xlsx'

try:
    with pd.ExcelWriter(output_file_name) as writer:
        all_results_df.to_excel(writer, sheet_name='All Results', index=False)
        best_results_df.to_excel(writer, sheet_name='Best Results', index=False)
    print(f"All results saved successfully to {output_file_name}")
except Exception as e:
    print(f"An error occurred while saving the Excel file: {e}")

print("\n--- Consolidated Results Summary (with Parameters) ---")
display(all_results_df)

print("\n--- Best Results Summary ---")
display(best_results_df)


#Zapisz podsumowanie do jednego pliku
All results saved successfully to classifier_performance_summary_noise_augmentation_cross_test.xlsx

--- Consolidated Results Summary (with Parameters) ---


Unnamed: 0,Classifier,Training Data,Test Data,Accuracy,Precision,Recall,F1-score,Parameters
0,kNN,Clean,Clean,0.3856,0.421095,0.3856,0.385748,k=1.0
1,kNN,Clean,Salt and Pepper Noise,0.3627,0.415222,0.3627,0.360561,k=1.0
2,kNN,Clean,Gaussian Noise,0.3653,0.407487,0.3653,0.361836,k=1.0
3,kNN,Clean,Clean,0.3687,0.443580,0.3687,0.366432,k=3.0
4,kNN,Clean,Salt and Pepper Noise,0.3421,0.436006,0.3421,0.335150,k=3.0
...,...,...,...,...,...,...,...,...
166,SimpleCNN,Salt and Pepper Noise (Augmentation),Salt and Pepper Noise,0.6530,0.654805,0.6530,0.652170,Default/Trained
167,SimpleCNN,Salt and Pepper Noise (Augmentation),Gaussian Noise,0.2575,0.437591,0.2575,0.202855,Default/Trained
168,SimpleCNN,Gaussian Noise (Augmentation),Clean,0.5585,0.600951,0.5585,0.548677,Default/Trained
169,SimpleCNN,Gaussian Noise (Augmentation),Salt and Pepper Noise,0.5910,0.597810,0.5910,0.585260,Default/Trained



--- Best Results Summary ---


Unnamed: 0,Classifier,Training Data,Test Data,Accuracy,Precision,Recall,F1-score,Parameters
105,Random Forest,Clean,Clean,0.466,0.462205,0.466,0.461609,"n_estimators=200.0, max_depth=30.0"
107,Random Forest,Clean,Gaussian Noise,0.4487,0.445458,0.4487,0.444818,"n_estimators=200.0, max_depth=30.0"
106,Random Forest,Clean,Salt and Pepper Noise,0.4486,0.446609,0.4486,0.445226,"n_estimators=200.0, max_depth=30.0"
159,Random Forest,Gaussian Noise,Clean,0.4547,0.450641,0.4547,0.44907,"n_estimators=200.0, max_depth=30.0"
158,Random Forest,Gaussian Noise,Gaussian Noise,0.4418,0.437292,0.4418,0.435959,"n_estimators=200.0, max_depth=20.0"
160,Random Forest,Gaussian Noise,Salt and Pepper Noise,0.4427,0.438645,0.4427,0.438175,"n_estimators=200.0, max_depth=30.0"
132,Random Forest,Salt and Pepper Noise,Clean,0.4446,0.440403,0.4446,0.437642,"n_estimators=200.0, max_depth=30.0"
134,Random Forest,Salt and Pepper Noise,Gaussian Noise,0.4288,0.422855,0.4288,0.422714,"n_estimators=200.0, max_depth=30.0"
133,Random Forest,Salt and Pepper Noise,Salt and Pepper Noise,0.448,0.443407,0.448,0.443111,"n_estimators=200.0, max_depth=30.0"
60,SVM,Clean,Clean,0.5667,0.565908,0.5667,0.566051,"Kernel=rbf, C=10.0"
