In [None]:
from data.datasets import SecondStageModeling
from torch.utils.data import random_split, DataLoader
import xgboost as xgb
import numpy as np

#Defining dataset
dataset = SecondStageModeling()
val_len = round(0.2*len(dataset))
train_len = len(dataset) - val_len
train_ds, test_ds = random_split(dataset, [train_len, val_len])

#Convert to dataloaders
train_dl = DataLoader(train_ds, 32, True)
test_dl = DataLoader(test_ds, 32, True)

# Convert PyTorch datasets to numpy arrays
train_ds = [(np.concatenate((data['swarm_a'], data['swarm_b'],data['swarm_c'])).flatten(), data['dst'].numpy()) for batch in train_dl for data in batch]
test_ds = [(np.concatenate((data['swarm_a'], data['swarm_b'],data['swarm_c'])).flatten(), data['dst'].numpy()) for batch in test_dl for data in batch]

# Prepare data for XGBoost
X_train, y_train = zip(*train_ds)
X_test, y_test = zip(*test_ds)

# Convert to numpy arrays
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test, axis=0)

# Train XGBoost model
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Boosting method

In [None]:

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report

params = {
    'objective': 'multi:softmax',  # for multiclass classification
    'num_class': 4,  # number of classes in the dataset
    'max_depth': 3,
    'eta': 0.1,
    'eval_metric': 'merror'  # merror for multiclass classification error
}

num_round = 100

# Train the model
model = xgb.train(params, dtrain, num_round)

#Validation
y_pred = model.predict(dtest)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average = 'weighted')
recall = recall_score(y_test, y_pred, average = 'weighted')
precision = precision_score(y_test, y_pred, average = 'weighted')


In [None]:
model.save_model('xgboost_model.model')

## Classification report

In [None]:
print(classification_report(y_test, y_pred))

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

n_estimators = 100
random_state = 42


model = RandomForestClassifier(n_estimators, random_state)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

#Validation
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average = 'weighted')
recall = recall_score(y_test, y_pred, average = 'weighted')
precision = precision_score(y_test, y_pred, average = 'weighted')


## Classification report

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
import joblib
joblib.dump(model, 'rf.joblib')