In [37]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier

In [38]:
train = pd.read_csv("/content/train_data.csv")
test = pd.read_csv("/content/test_data.csv")

In [39]:
def time(x):
  y=x.split(' ')
  z=y[1].split(":")
  number=(int)(z[0])
  if number>=12:
    return 'PM'
  else:
    return 'AM'
subtask_1=test['Timestamp'].apply(time)

In [40]:
features = [
    "Suspicious_Port_Activity",
    "Traffic_Volume_Variation",
    "Packet_Length_Anomaly",
    "Malware_Score",
    "Threat_Level_Index",
    "User_Behavior_Score",
    "Geo_Dispersion",
    "Payload_Entropy",
    "Login_Attempts",
    "Device_Response_Time",
    "Session_Duration",
    "Packet_Retry_Rate",
    "Anomaly_Tendency"
]

In [41]:
preprocessor=ColumnTransformer(transformers=[
    ('numeric',StandardScaler(),features)
])


In [42]:
model=Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('regressor',RandomForestClassifier(
        n_estimators=300,
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        max_features='sqrt',
        n_jobs=-1,
        random_state=42))
])

In [43]:
model.fit(train[features],train['Attack Type'])
y_pred=model.predict(test[features])

In [44]:
solution=pd.DataFrame()
for i in range(test.shape[0]):
    id=test['ID'][i]
    sol=pd.DataFrame({
        'subtaskID':[1],
        'datapointID':[id],
        'answer':[subtask_1[i]]
    })
    solution=pd.concat([solution,sol],ignore_index=True)
for i in range(test.shape[0]):
    id=test['ID'][i]
    sol=pd.DataFrame({
        'subtaskID':[2],
        'datapointID':[id],
        'answer':[y_pred[i]]
    })
    solution=pd.concat([solution,sol],ignore_index=True)
solution.to_csv("submission.csv",index=False)