## importing libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## importing datasets

In [2]:
dataset = pd.read_csv('/kaggle/input/random-2/Log Dataset.csv')

# dataset.head
print(dataset.shape)

## Dropping the following columns: Timestamp, Source, Destination, Description
drop_col = ['Timestamp', 'Source', 'Destination', 'Description']
dataset.drop(drop_col, axis=1, inplace=True)

# dataset.head
print(dataset.shape)

(500, 11)
(500, 7)


## Encoding first 2 columns - SourceClass and DesinationClass

In [3]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

dataset['SourceClass'] = le.fit_transform(dataset['SourceClass'])
dataset['DestinationClass'] = le.fit_transform(dataset['DestinationClass'])

dataset.head

<bound method NDFrame.head of      SourceClass  DestinationClass       User          Device  \
0              1                 1    user123  Workstation123   
1              1                 1  anonymous       ServerABC   
2              0                 1      admin       DeviceXYZ   
3              0                 1    user123  Workstation123   
4              0                 1      guest       DeviceXYZ   
..           ...               ...        ...             ...   
495            0                 0  anonymous       DeviceXYZ   
496            1                 1  anonymous       DeviceXYZ   
497            1                 1  anonymous  Workstation123   
498            1                 1      guest       DeviceXYZ   
499            1                 1      admin  Workstation123   

              EventType       Severity  MLRiskScore  
0     application-usage  informational         0.13  
1       system-shutdown       critical         0.33  
2          auth-lockout  in

## replacing other columns with numbers as well 

In [4]:
## Severity Column
# Dictionary for mapping severity to values
event_severity_threat = {"informational": 0.1, "warning": 0.7, "error": 0.4, "critical": 0.9}
dataset['Severity'] = dataset['Severity'].replace(event_severity_threat)

## EventType Column
event_type_threat = {
    "auth-failed": 0.8,
    "auth-success": 0.1,
    "auth-lockout": 0.9,
    "network-connected": 0.2,
    "network-disconnected": 0.2,
    "firewall-change": 0.7,
    "dns-queries": 0.3,
    "malware-detection": 0.9,
    "system-shutdown": 0.8,
    "system-restart": 0.7,
    "system-failure": 0.9,
    "application-errors": 0.6,
    "application-usage": 0.2,
    "api-called": 0.4,
    "file-access": 0.5,
    "permission-changes": 0.3,
    "software-update": 0.6
}
dataset['EventType'] = dataset['EventType'].replace(event_type_threat)

## Device Column
devices = {"Workstation123": 1, "DeviceXYZ": 2, "ServerABC": 3}
dataset['Device'] = dataset['Device'].replace(devices)


## User Column
users = {"user123": 1, "guest": 2, "admin": 3, "anonymous":4}
dataset['User'] = dataset['User'].replace(users)

dataset.shape
dataset

Unnamed: 0,SourceClass,DestinationClass,User,Device,EventType,Severity,MLRiskScore
0,1,1,1,1,0.2,0.1,0.13
1,1,1,4,3,0.8,0.9,0.33
2,0,1,3,2,0.9,0.1,0.63
3,0,1,1,1,0.9,0.4,0.63
4,0,1,2,2,0.5,0.4,0.50
...,...,...,...,...,...,...,...
495,0,0,4,2,0.1,0.1,0.63
496,1,1,4,2,0.8,0.4,0.33
497,1,1,4,1,0.8,0.9,0.33
498,1,1,2,2,0.6,0.7,0.27


## train test split

In [5]:
# Separate features (X) and target (y)
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [6]:
print(X_train)
print(X_train.shape)

[[1.  1.  2.  3.  0.7 0.1]
 [1.  1.  3.  3.  0.9 0.1]
 [0.  1.  4.  2.  0.1 0.1]
 ...
 [1.  1.  1.  2.  0.9 0.1]
 [1.  0.  1.  2.  0.9 0.7]
 [1.  1.  4.  3.  0.6 0.9]]
(400, 6)


## model

In [7]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

## predict

In [8]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
# print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

## model predict

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9993863658677605

## safe

In [10]:
safe_ips = [
    "192.168.1.100", "10.0.0.2", "172.16.0.10", "10.10.10.10",
    "192.168.0.1", "192.168.2.5", "10.1.1.1", "172.16.1.100",
    "192.168.10.5", "10.0.0.100"
]
safe_destination_ips = [
    "192.168.0.2", "10.0.0.3", "172.16.0.20", "10.10.10.20",
    "192.168.0.3", "192.168.2.6", "10.1.1.2", "172.16.1.101",
    "192.168.10.6", "10.0.0.101"
]  
# event_type_threat = {
#     "auth-failed": 0.8,
#     "auth-success": 0.1,
#     "auth-lockout": 0.9,
#     "network-connected": 0.2,
#     "network-disconnected": 0.2,
#     "firewall-change": 0.7,
#     "dns-queries": 0.3,
#     "malware-detection": 0.9,
#     "system-shutdown": 0.8,
#     "system-restart": 0.7,
#     "system-failure": 0.9,
#     "application-errors": 0.6,
#     "application-usage": 0.2,
#     "api-called": 0.4,
#     "file-access": 0.5,
#     "permission-changes": 0.3,
#     "software-update": 0.6
# }
# event_severity_threat = {"informational": 0.1, "warning": 0.7, "error": 0.4, "critical": 0.9}
# devices = {"Workstation123": 1, "DeviceXYZ": 2, "ServerABC": 3}
# users = {"user123": 1, "guest": 2, "admin": 3, "anonymous":4}

## taking user input for predicts

In [13]:
SourceClass = input("Enter IP Address of user :: ")
SourceClass = SourceClass.split(':')[0]
if SourceClass in safe_ips:
    SourceClass = 1
else:
    SourceClass = 0

DestinationClass = input("Enter Source Location :: ")
DestinationClass = DestinationClass.split(':')[0]
if DestinationClass in safe_destination_ips:
    DestinationClass = 1
else:
    DestinationClass = 0

User = input("Enter user :: ")
if User in users:
    User = users[User]
else:
    print("Invalid user input.")
        
Device = input("Enter device name :: ")
if Device in devices:
    Device = devices[Device]
else:
    print("Invalid user input.")
        
EventType = input("Enter event type :: ")
if EventType in event_type_threat:
    EventType = event_type_threat[EventType]
else:
    print("Invalid user input.")
    
Severity = input("Enter Severity :: ")
if Severity in event_severity_threat:
    Severity = event_severity_threat[Severity]
else:
    print("Invalid user input.")
    
regressor.predict([[SourceClass, DestinationClass, User, Device, EventType, Severity]])

Enter IP Address of user ::  192.111.121
Enter Source Location ::  1281.11.111
Enter user ::  user123
Enter device name ::  ServerABC
Enter event type ::  auth-failed
Enter Severity ::  critical


array([0.87])