In [1]:
import os
import math
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import time
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import StandardScaler
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.metrics import f1_score

In [2]:
class CustomDataset(Dataset):
    def __init__(self, csv_path="data/dataset_train_2024.csv"):
        
        data = pd.read_csv(csv_path)
        
        self.sequences_1 = data.iloc[:, 1:129].values * 100
        self.sequences_2 = data.iloc[:, 129:257].values * 100
        self.extra_feature = data.iloc[:, 257].values.reshape(-1, 1)

        all_features = np.hstack([self.sequences_1, self.sequences_2, self.extra_feature])
        
        self.scaler = StandardScaler()
        self.normalized_features = self.scaler.fit_transform(all_features)
        self.features = torch.tensor(self.normalized_features, dtype=torch.float32)


        self.label_encoder = LabelEncoder()
        self.labels = torch.tensor(self.label_encoder.fit_transform(data.iloc[:, -1]), dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]
    
    def inverseTransform(self, array):
        return self.label_encoder.inverse_transform(array)

In [None]:
dataset = CustomDataset(csv_path="data/dataset_train_2024.csv")
features = dataset.features.numpy()
labels = dataset.labels.numpy()

X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, random_state=42, stratify=labels
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

svm = SVC()

param_grid = {
    'C': [0.1, 1, 10, 20, 100],
    'kernel': ['poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'],
}

grid_search = GridSearchCV(svm, param_grid, scoring='accuracy', cv=5, verbose=2)
grid_search.fit(X_train, y_train)

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Cross-Validation Accuracy: {grid_search.best_score_:.4f}")

best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

print("Classification Report:\n", classification_report(y_test, y_pred))

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] END ...........C=20, degree=2, gamma=scale, kernel=poly; total time=   9.1s
[CV] END ...........C=20, degree=2, gamma=scale, kernel=poly; total time=   9.4s
[CV] END ...........C=20, degree=2, gamma=scale, kernel=poly; total time=   9.5s
[CV] END ...........C=20, degree=2, gamma=scale, kernel=poly; total time=  10.2s
[CV] END ...........C=20, degree=2, gamma=scale, kernel=poly; total time=  10.9s
[CV] END ............C=20, degree=2, gamma=scale, kernel=rbf; total time=  11.0s
[CV] END ............C=20, degree=2, gamma=scale, kernel=rbf; total time=  10.6s
[CV] END ............C=20, degree=2, gamma=scale, kernel=rbf; total time=  10.2s
[CV] END ............C=20, degree=2, gamma=scale, kernel=rbf; total time=   9.9s
[CV] END ............C=20, degree=2, gamma=scale, kernel=rbf; total time=   9.2s
[CV] END ........C=20, degree=2, gamma=scale, kernel=sigmoid; total time=   8.7s
[CV] END ........C=20, degree=2, gamma=scale, k

Best Parameters: {'C': 10, 'degree': 4, 'gamma': 'scale', 'kernel': 'poly'}

Best Cross-Validation Accuracy: 0.7495

Test Accuracy: 0.7633

# Saving the model 

In [4]:
import joblib

model_path = "./SVM/svm_model.pkl"
joblib.dump(best_svm, model_path)
print(f"SVM model saved to {model_path}")


SVM model saved to ./SVM/svm_model.pkl


# Kaggle

In [5]:
unlabeled_csv_path = "data/dataset_test_no_label_2024.csv"
unlabeled_df = pd.read_csv(unlabeled_csv_path)

unlabeled_df = unlabeled_df.drop(unlabeled_df.columns[0], axis=1)

test_features = scaler.transform(unlabeled_df.values)

test_predictions = best_svm.predict(test_features)

decoded_predictions = dataset.inverseTransform(test_predictions)

submission_df = pd.DataFrame({"ID": range(len(decoded_predictions)), "MODULATION": decoded_predictions})

submission_file_path = "preditions_SVM.csv"
submission_df.to_csv(submission_file_path, index=False)

print(f"Submission saved to {submission_file_path}")


Submission saved to preditions_SVM.csv
