In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from collections import deque
import socket
import json


In [None]:
def get_traffic_data():
  # Example socket connection
  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  s.connect(("localhost", #))
  data = json.loads(s.recv(1024).decode())
  s.close()
  return data

# Features and data loading (replace with your data source)
features = ["source_ip", "destination_ip", "port", "packet_size", "timestamp"]
#features = ["#", "#", "443", "1440", "timestamp"]
data = get_traffic_data()[features]
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Load labeled data for classification (optional)
labeled_data = pd.read_csv("labeled_traffic.csv")
labeled_features = labeled_data[features]
labeled_targets = labeled_data["traffic_type"]


In [None]:
anomaly_detector = IsolationForest(contamination=0.1)
anomaly_detector.fit(data_scaled)

def is_anomalous(traffic_data):
  scaled_data = scaler.transform(traffic_data.reshape(1, -1))
  prediction = anomaly_detector.predict(scaled_data)
  return prediction[0] == 1  # 1 indicates anomaly


In [None]:
knn = GradientBoostingClassifier()  # Using GBC for improved classification
knn.fit(labeled_features, labeled_targets)

def classify_traffic(traffic_data):
  scaled_data = scaler.transform(traffic_data.reshape(1, -1))
  prediction = knn.predict(scaled_data)
  return prediction[0]


In [None]:
window_size = 100
traffic_queue = deque(maxlen=window_size)
normal_threshold = 0.8  # Minimum normal traffic proportion in window

while True:
  new_data = get_traffic_data()
  traffic_queue.append(new_data)

  is_anomalous_current = is_anomalous(new_data)
  normal_count = sum(not is_anomalous(x) for x in traffic_queue)

  # Analyze recent window for anomalies and suspicious activity patterns
  if is_anomalous_current or normal_count / window_size < normal_threshold:
    print(f"Alert: Potential anomaly or suspicious activity for {new_data}")

    # Classify traffic if enabled
    if knn:
      traffic_type = classify_traffic(new_data)
      print(f"Traffic classified as: {traffic_type}")

    # Implement logic for further investigation and analysis based on alerts and classifications
    # ... (e.g., trigger alarms, send notifications, log events)


In [None]:
anomaly_score = anomaly_detector.decision_function(data_scaled[0])
print(f"Anomaly score for first data point: {anomaly_score}")

In [None]:
import matplotlib.pyplot as plt
plt.plot(data_scaled[:, 0])  # Plot first feature
plt.show()