In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score

# Load training dataset
train_data = pd.read_csv('train.csv')

# Data preprocessing and feature engineering (if needed)

# Encode class labels using LabelEncoder
label_encoder = LabelEncoder()
train_data['Attack_type'] = label_encoder.fit_transform(train_data['Attack_type'])

# Split data into features (X) and target variable (y)
X = train_data.drop('Attack_type', axis=1)
X = pd.get_dummies(X)
y = train_data['Attack_type']

# Feature Scaling (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handling Imbalanced Data with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create a RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)

# Instantiate the GridSearchCV object
grid_search = GridSearchCV(rf_model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Fit the grid search to the data
grid_search.fit(X_resampled, y_resampled)

# Get the best parameters
best_params = grid_search.best_params_
print(f'Best Hyperparameters: {best_params}')

# Use the best parameters to create the final model
final_model = RandomForestClassifier(**best_params, random_state=42)
final_model.fit(X_resampled, y_resampled)

# Load test dataset
test_data = pd.read_csv('test.csv')

# Apply one-hot encoding to the test dataset
test_data = pd.get_dummies(test_data)

# Align the columns of the test dataset with the training dataset
test_data = test_data.reindex(columns=X.columns, fill_value=0)

# Feature Scaling (Standardization) using the same scaler as for training data
test_data_scaled = scaler.transform(test_data)

# Apply the trained model to the test dataset for predictions
test_predictions = final_model.predict(test_data_scaled)

# Create a submission file with decoded class labels
test_predictions_labels = label_encoder.fit_transform(test_predictions)
submission = pd.DataFrame({'id': test_data['id'], 'Attack_type': test_predictions_labels})
submission.set_index('id', inplace=True)

# Save the submission file
submission.to_csv('submission.csv')
