In [4]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [5]:
df = pd.read_csv("cybersecurity_intrusion_data.csv") 

In [6]:
df

Unnamed: 0,session_id,network_packet_size,protocol_type,login_attempts,session_duration,encryption_used,ip_reputation_score,failed_logins,browser_type,unusual_time_access,attack_detected
0,SID_00001,599,TCP,4,492.983263,DES,0.606818,1,Edge,0,1
1,SID_00002,472,TCP,3,1557.996461,DES,0.301569,0,Firefox,0,0
2,SID_00003,629,TCP,3,75.044262,DES,0.739164,2,Chrome,0,1
3,SID_00004,804,UDP,4,601.248835,DES,0.123267,0,Unknown,0,1
4,SID_00005,453,TCP,5,532.540888,AES,0.054874,1,Firefox,0,0
...,...,...,...,...,...,...,...,...,...,...,...
9532,SID_09533,194,ICMP,3,226.049889,AES,0.517737,3,Chrome,0,1
9533,SID_09534,380,TCP,3,182.848475,,0.408485,0,Chrome,0,0
9534,SID_09535,664,TCP,5,35.170248,AES,0.359200,1,Firefox,0,0
9535,SID_09536,406,TCP,4,86.664703,AES,0.537417,1,Chrome,1,0


In [7]:
print(df.shape)

(9537, 11)


In [8]:
print(df.columns)

Index(['session_id', 'network_packet_size', 'protocol_type', 'login_attempts',
       'session_duration', 'encryption_used', 'ip_reputation_score',
       'failed_logins', 'browser_type', 'unusual_time_access',
       'attack_detected'],
      dtype='object')


In [9]:
print(df.head())

  session_id  network_packet_size protocol_type  login_attempts  \
0  SID_00001                  599           TCP               4   
1  SID_00002                  472           TCP               3   
2  SID_00003                  629           TCP               3   
3  SID_00004                  804           UDP               4   
4  SID_00005                  453           TCP               5   

   session_duration encryption_used  ip_reputation_score  failed_logins  \
0        492.983263             DES             0.606818              1   
1       1557.996461             DES             0.301569              0   
2         75.044262             DES             0.739164              2   
3        601.248835             DES             0.123267              0   
4        532.540888             AES             0.054874              1   

  browser_type  unusual_time_access  attack_detected  
0         Edge                    0                1  
1      Firefox                    0 

In [10]:
df.dropna(inplace=True)

In [11]:
label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [15]:
X = df.drop('attack_detected', axis=1)

In [16]:
X

Unnamed: 0,session_id,network_packet_size,protocol_type,login_attempts,session_duration,encryption_used,ip_reputation_score,failed_logins,browser_type,unusual_time_access
0,0,599,1,4,492.983263,1,0.606818,1,1,0
1,1,472,1,3,1557.996461,1,0.301569,0,2,0
2,2,629,1,3,75.044262,1,0.739164,2,0,0
3,3,804,2,4,601.248835,1,0.123267,0,4,0
4,4,453,1,5,532.540888,0,0.054874,1,2,0
...,...,...,...,...,...,...,...,...,...,...
9528,7566,535,1,7,50.518476,1,0.767659,1,1,1
9531,7567,746,1,7,315.151758,1,0.190059,3,0,0
9532,7568,194,0,3,226.049889,0,0.517737,3,0,0
9534,7569,664,1,5,35.170248,0,0.359200,1,2,0


In [18]:
y = df['attack_detected']
y

0       1
1       0
2       1
3       1
4       0
       ..
9528    1
9531    1
9532    1
9534    0
9535    0
Name: attack_detected, Length: 7571, dtype: int64

In [19]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [20]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [21]:
model=RandomForestClassifier(n_estimators=100,random_state=42)
model.fit(X_train,y_train)

In [22]:
y_pred=model.predict(X_test)
print("Accuracy:",accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.9023102310231023
              precision    recall  f1-score   support

           0       0.85      1.00      0.92       841
           1       1.00      0.78      0.88       674

    accuracy                           0.90      1515
   macro avg       0.92      0.89      0.90      1515
weighted avg       0.92      0.90      0.90      1515



In [25]:
with open('model.pkl','wb') as f:pickle.dump(model,f)

In [26]:
with open('scaler.pkl','wb') as f:pickle.dump(scaler,f)

In [27]:
with open('label_encoders.pkl','wb') as f:pickle.dump(label_encoders,f)

In [28]:
print("Model,Scaler, and Encoders saved!")

Model,Scaler, and Encoders saved!
