In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
import torch
from torchvision import datasets, transforms

# Σύνδεση με Google Drive
from google.colab import drive
drive.mount('/content/drive')

data_path = '/content/drive/MyDrive/cifar-10-batches-py'

# Μετασχηματισμός των δεδομένων
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Φόρτωση του CIFAR-10 dataset
train_dataset = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

x_train = torch.stack([train_dataset[i][0] for i in range(len(train_dataset))]).numpy()
y_train = np.array([train_dataset[i][1] for i in range(len(train_dataset))])

x_test = torch.stack([test_dataset[i][0] for i in range(len(test_dataset))]).numpy()
y_test = np.array([test_dataset[i][1] for i in range(len(test_dataset))])

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

Mounted at /content/drive
Files already downloaded and verified
Files already downloaded and verified


In [2]:
# Επιλογή των κλάσεων
selected_classes = [1, 2]
train_mask = np.isin([train_dataset[i][1] for i in range(len(train_dataset))], selected_classes)
test_mask = np.isin([test_dataset[i][1] for i in range(len(test_dataset))], selected_classes)

# Επιλογή των δεδομένων με βάση τις μάσκες
x_train = torch.stack([train_dataset[i][0] for i in range(len(train_dataset))])[train_mask][:1000]
y_train = np.array([train_dataset[i][1] for i in range(len(train_dataset))])[train_mask][:1000]
x_test = torch.stack([test_dataset[i][0] for i in range(len(test_dataset))])[test_mask][:200]
y_test = np.array([test_dataset[i][1] for i in range(len(test_dataset))])[test_mask][:200]

train_filter = np.isin(y_train, selected_classes)
test_filter = np.isin(y_test, selected_classes)

x_train_filtered = x_train[train_filter]
y_train_filtered = y_train[train_filter]
x_test_filtered = x_test[test_filter]
y_test_filtered = y_test[test_filter]

# Μετατροπή ετικετών σε -1 και 1
y_train_filtered = np.where(y_train_filtered == selected_classes[0], -1, 1)
y_test_filtered = np.where(y_test_filtered == selected_classes[0], -1, 1)


In [8]:
!pip install torch torchvision



In [9]:
class SVM:
    def __init__(self, C, kernel, degree, gamma):
        self.C = C
        self.w = 0
        self.b = 0
        self.kernel = kernel
        self.degree = degree
        self.gamma = gamma

    def kernel_function(self, x1, x2):
        if self.kernel == 'linear':
            return np.dot(x1, x2)
        elif self.kernel == 'poly':
            return (np.dot(x1, x2) + 1) ** self.degree
        elif self.kernel == 'rbf':
          if self.gamma == 'scale':
                self.gamma = 1 / x1.shape[0]
          return np.exp(-self.gamma * np.linalg.norm(x1 - x2) ** 2)
        else:
            raise ValueError("Λάθος μέθοδος")

    def hingeloss(self, w, b, X, Y):
     reg = 0.5 * np.dot(w, w.T)  #  1/2 * norm (w)


     hinge_losses = 0
     for i in range(X.shape[0]):
         opt_term = Y[i] * (np.dot(w, X[i]) + b)  # y*(w*x + b)
         hinge_losses += max(0, 1 - opt_term)

     total_loss = reg + self.C * hinge_losses  # Ολική απώλεια

     return total_loss


    def fit(self, X, Y, batch_size, learning_rate, epochs):
        if isinstance(X, torch.Tensor):
            X = X.cpu().numpy()
        if isinstance(Y, torch.Tensor):
            Y = Y.cpu().numpy()
        self.learning_rate = learning_rate
        self.epochs = epochs
        num_samples = X.shape[0]
        num_features = X.shape[1]
        c = self.C
        ids = np.arange(num_samples)
        np.random.shuffle(ids)
        w = np.zeros((1, num_features))
        b = 0
        losses = []
        for epoch in range(epochs):
            l = self.hingeloss(w, b, X, Y)
            losses.append(l)
            for batch_start in range(0, num_samples, batch_size):
                gradw = np.zeros((1, num_features))
                gradb = 0
                for j in range(batch_start, min(batch_start + batch_size, num_samples)):
                    x = ids[j]
                    if self.kernel == 'linear':
                        ti = Y[x] * (np.dot(w, X[x].reshape(1, -1).astype(np.float64).T) + b)
                    else:
                        kernel_sum = np.sum([self.kernel_function(X[x], X[i]) for i in range(num_samples)])
                        ti = Y[x] * (kernel_sum + b)
                    if np.any(ti > 1):
                        gradw += 0
                        gradb += 0
                    else:
                        gradw += (c * Y[x] * X[x].reshape(1, -1))
                        gradb += c * Y[x]
                gradw = gradw.astype(np.float64)
                w = w - learning_rate * w + learning_rate * gradw  #
                b = b + learning_rate * gradb

            self.w = w
            self.b = b

        return self.w, self.b, losses

    def predict(self, X):
        return np.sign(np.dot(X, self.w[0]) + self.b)

In [10]:
# Παράμετροι SVM

batch_size = 64
learning_rate = 0.001
epochs = 100

# Δημιουργία και εκπαίδευση SVM



In [11]:
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

In [None]:
print("Shape of x_train_flat:", x_train_flat.shape)
print("Shape of x_test_flat:", x_test_flat.shape)

NameError: name 'x_train_flat' is not defined

In [12]:
from sklearn.decomposition import PCA

pca = PCA(n_components=90)
x_train_flat_reduced = pca.fit_transform(x_train_flat)
x_test_flat_reduced = pca.transform(x_test_flat)

In [None]:
print("Shape of x_train_flat:", x_train_flat_reduced.shape)
print("Shape of x_test_flat:", x_test_flat.shape)

Shape of x_train_flat: (1000, 90)
Shape of x_test_flat: torch.Size([200, 3072])


In [14]:
C_values = [0.5 , 1, 5, 10]
gamma_values = ['scale', 0.1,1,5]

In [None]:


for C in C_values:
  print(f"RBF SVM για C={C}")
  for g in gamma_values:
     print(f"gamma={g}")
     svm = SVM(C=C , kernel='rbf',degree=1, gamma=g )
     svm.fit(x_train_flat_reduced, y_train_filtered, batch_size, learning_rate, epochs)
     y_train_pred = svm.predict(x_train_flat_reduced)
     y_test_pred = svm.predict(x_test_flat_reduced)


     train_accuracy = np.mean(y_train_pred == y_train_filtered) * 100
     test_accuracy = np.mean(y_test_pred == y_test_filtered) * 100

     print(f"Training Accuracy: {train_accuracy:.2f}%")
     print(f"Test Accuracy: {test_accuracy:.2f}%")
     print("-------------------")

In [None]:
for C in C_values:
    print(f"Linear SVM για C={C} ")
    svm = SVM(C=C, kernel='linear', degree=1, gamma=1)
    w, b, losses = svm.fit(x_train_flat, y_train_filtered, batch_size, learning_rate, epochs)
    y_train_pred = svm.predict(x_train_flat)
    y_test_pred = svm.predict(x_test_flat)


    train_accuracy = accuracy_score(y_train_pred, y_train_filtered) * 100
    test_accuracy = accuracy_score(y_test_pred, y_test_filtered)*100



    print(f"Training Accuracy: {train_accuracy:.2f}%")
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    print("-------------------")

In [22]:
degree_values = [1 ,2, 3, 10]

In [None]:
for C in C_values:
  print(f"Polynomial SVM για C={C}")
  for d in degree_values:
    print(f"d={d}")
    svm = SVM(C=C , kernel='poly', degree=d ,gamma=1 )
    w, b, losses = svm.fit(x_train_flat_reduced, y_train_filtered, batch_size, learning_rate, epochs)
    y_train_pred = svm.predict(x_train_flat_reduced)
    y_test_pred = svm.predict(x_test_flat_reduced)


    train_accuracy = np.mean(y_train_pred == y_train_filtered) * 100
    test_accuracy = np.mean(y_test_pred == y_test_filtered) * 100

    print(f"Training Accuracy: {train_accuracy:.2f}%")
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    print("-------------------")

In [24]:
# Μετακινώ ανάλογα με το τι θέλω να μετρήσω
correct_idx = np.where(y_test_pred == y_test_filtered)[0]
incorrect_idx = np.where(y_test_pred != y_test_filtered)[0]
print(f"Αριθμός σωστών ταξινομήσεων: {len(correct_idx)}")
print(f"Αριθμός λανθασμένων ταξινομήσεων: {len(incorrect_idx)}")