In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

In [None]:
dataset = pd.read_csv("data/dataset_train_2024.csv")

sequences_1 = dataset.iloc[:, 1:129].values
sequences_2 = dataset.iloc[:, 129:257].values
extra_feature = dataset.iloc[:, 257].values.reshape(-1, 1)

all_features = np.hstack([sequences_1, sequences_2, extra_feature])

scaler = StandardScaler()
normalized_features = scaler.fit_transform(all_features)

labels = dataset.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(normalized_features, labels, test_size=0.2, random_state=42, stratify=labels)

rf_model = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print(f"Best parameters found: {grid_search.best_params_}")

best_rf_model = grid_search.best_estimator_

best_rf_model.fit(X_train, y_train)

y_pred = best_rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

#import joblib
#joblib.dump(best_rf_model, 'random_forest_model.pkl')


Fitting 5 folds for each of 162 candidates, totalling 810 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters found: {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Test Accuracy: 0.7083
Classification Report:
               precision    recall  f1-score   support

        8PSK       0.62      0.81      0.70       480
        BPSK       0.98      0.79      0.88       480
       CPFSK       0.92      0.53      0.67       480
        GFSK       0.61      0.99      0.76       480
        QPSK       0.60      0.41      0.49       480

    accuracy                           0.71      2400
   macro avg       0.75      0.71      0.70      2400
weighted avg       0.75      0.71      0.70      2400



: 

Best parameters found: {'bootstrap': False, 'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}

Test Accuracy: 0.7083

Classification Report:
               precision    recall  f1-score   support

        8PSK       0.62      0.81      0.70       480
        BPSK       0.98      0.79      0.88       480
       CPFSK       0.92      0.53      0.67       480
        GFSK       0.61      0.99      0.76       480
        QPSK       0.60      0.41      0.49       480

    accuracy                           0.71      2400

   macro avg       0.75      0.71      0.70      2400
   
weighted avg       0.75      0.71      0.70      2400

# Kaggle

In [None]:
unlabeled_csv_path = "data/dataset_test_no_label_2024.csv"
unlabeled_df = pd.read_csv(unlabeled_csv_path)

unlabeled_df = unlabeled_df.drop(unlabeled_df.columns[0], axis=1)

unlabeled_sequences_1 = unlabeled_df.iloc[:, :128].values
unlabeled_sequences_2 = unlabeled_df.iloc[:, 128:256].values
unlabeled_extra_feature = unlabeled_df.iloc[:, 256].values.reshape(-1, 1)

unlabeled_all_features = np.hstack([unlabeled_sequences_1, unlabeled_sequences_2, unlabeled_extra_feature])

normalized_test_features = scaler.transform(unlabeled_all_features)

test_predictions = best_rf_model.predict(normalized_test_features)

submission_df = pd.DataFrame({
    "ID": range(len(test_predictions)),
    "MODULATION": test_predictions
})

submission_file_path = "./CSV/Spredictions_RF.csv"
submission_df.to_csv(submission_file_path, index=False)

print(f"Submission saved to {submission_file_path}")