In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler, LabelEncoder
from datetime import datetime

In [2]:
# Load the log dataset (replace with your dataset file)
data = pd.read_csv('../dataset/log_data.csv')

In [3]:
# Convert timestamp to datetime objects
data['timestamp'] = pd.to_datetime(data['timestamp'])

In [4]:
# Calculate time difference in seconds from the minimum timestamp
min_timestamp = data['timestamp'].min()
data['time_diff'] = (data['timestamp'] - min_timestamp).dt.total_seconds()

In [5]:
# Select relevant features for anomaly detection
# In this case, we'll use time_diff, activity, and username
features = data[['time_diff', 'activity', 'username']]

In [6]:
# Convert categorical features to numerical using label encoding
label_encoders = {}
for col in ['activity', 'username']:
    label_encoders[col] = LabelEncoder()
    features[col] = label_encoders[col].fit_transform(features[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features[col] = label_encoders[col].fit_transform(features[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features[col] = label_encoders[col].fit_transform(features[col])


In [7]:
# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [8]:
# Create and train the Isolation Forest model
model = IsolationForest(contamination=0.05)  # Adjust contamination based on your data
model.fit(scaled_features)

In [9]:
# Predict anomalies (-1) and normal instances (1)
predictions = model.predict(scaled_features)

In [10]:
# Add the predictions back to the original dataset
data['anomaly_prediction'] = predictions

In [11]:
# Identify anomalies
anomalies = data[data['anomaly_prediction'] == -1]


In [12]:
# Define actionable insights templates
insight_templates = {
    'Unauthorized Access Detected': {
        'severity': 'High',
        'rule': 'Access Control Enforcement',
        'template': "Anomaly Details:\n- Timestamp: {timestamp}\n- User: {username}\n- Activity: {activity}\n- Recommendation: Investigate the unauthorized access by user '{username}' at the given timestamp. Check for signs of compromise and take appropriate action to secure the account and prevent further breaches."
    },
    # Define more templates for other compliance rules
}

In [13]:
# Generate actionable insights
actionable_insights = []
for _, anomaly_row in anomalies.iterrows():
    insight_template = insight_templates.get('Unauthorized Access Detected')  # Replace with relevant template
    if insight_template:
        actionable_insights.append(insight_template['template'].format(
            timestamp=anomaly_row['timestamp'],
            username=anomaly_row['username'],
            activity=anomaly_row['activity']
        ))

In [14]:
# Print actionable insights
print("Actionable Insights:")
for insight in actionable_insights:
    print(insight)


Actionable Insights:
Anomaly Details:
- Timestamp: 2023-08-06 14:37:52
- User: qgray
- Activity: AccessDenied
- Recommendation: Investigate the unauthorized access by user 'qgray' at the given timestamp. Check for signs of compromise and take appropriate action to secure the account and prevent further breaches.
Anomaly Details:
- Timestamp: 2023-01-25 07:35:07
- User: zsheppard
- Activity: ProductView
- Recommendation: Investigate the unauthorized access by user 'zsheppard' at the given timestamp. Check for signs of compromise and take appropriate action to secure the account and prevent further breaches.
Anomaly Details:
- Timestamp: 2023-01-05 15:43:33
- User: brandon42
- Activity: ProductView
- Recommendation: Investigate the unauthorized access by user 'brandon42' at the given timestamp. Check for signs of compromise and take appropriate action to secure the account and prevent further breaches.
Anomaly Details:
- Timestamp: 2023-06-04 11:32:37
- User: alexis64
- Activity: Product