<a href="https://colab.research.google.com/github/SanyaKapoor/GenevaPlus/blob/main/Geneva%2B%2B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Advancing cencorship Evasion with Real Time Detection

In [2]:
!pip install scapy

Collecting scapy
  Downloading scapy-2.5.0.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scapy
  Building wheel for scapy (setup.py) ... [?25l[?25hdone
  Created wheel for scapy: filename=scapy-2.5.0-py2.py3-none-any.whl size=1444327 sha256=44e9470687d359fdceac52a882592617a67e5defab1f763cc8dd03b17bbcf577
  Stored in directory: /root/.cache/pip/wheels/82/b7/03/8344d8cf6695624746311bc0d389e9d05535ca83c35f90241d
Successfully built scapy
Installing collected packages: scapy
Successfully installed scapy-2.5.0


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scapy.all import rdpcap, TCP
from google.colab import files
import numpy as np

class TRW:
    def __init__(self, alpha=0.01, beta=0.99):
        self.alpha = alpha
        self.beta = beta

    def likelihood_ratio(self, Y):
        pr_y_given_H1 = np.prod(Y)  # Assuming Y is a list of probabilities
        pr_y_given_H0 = np.prod(1 - np.array(Y))
        return pr_y_given_H1 / pr_y_given_H0

    def detect(self, Y, hist=0):
        n = len(Y)
        eta_1 = self.beta / self.alpha
        eta_0 = (1 - self.beta) / (1 - self.alpha)

        likelihood_ratio = self.likelihood_ratio(Y)

        if likelihood_ratio <= eta_0:
            return "Block Source IP"
        elif likelihood_ratio >= eta_1:
            if hist > 0:
                return "Continue with more observations"
            else:
                return "No History-Aware Reset of"
        else:
            return "Update Y=(Y1,...,Yn) and A(Y) No"


class ML_TRW_Detection:
    def __init__(self, alpha=0.01, beta=0.99):
        self.alpha = alpha
        self.beta = beta
        self.trw = TRW(alpha, beta)
        self.rf_model = None

    def train_model(self, X_train, y_train):
        # Train Random Forest model
        self.rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
        self.rf_model.fit(X_train, y_train)

    def detect_Geneva(self, packets):
        # Extract features from packets
        X = [load_and_label_pcap_data(packet) for packet in packets]
        # Predict probability of Geneva flow using the trained Random Forest model
        X = X[0]
        return self.rf_model.predict_proba(X)[:, 1]

    def detect_Geneva_realtime(self, Geneva_traffic, Normal_traffic):
        Y = []
        hist = 0
        for flow in Geneva_traffic:
            Y.append(self.detect_Geneva(flow))
            result = self.trw.detect(Y, hist)
            if result == "Block Source IP":
                return "Geneva detected, block IP"
            elif result == "Continue with more observations":
                hist += 1
        return "Geneva not detected"

def load_and_label_pcap_data(pcap_files):
  features = []
  labels = []
  for pcap_file in pcap_files:
    packets = scapy.rdpcap(pcap_file)
    for packet in packets:
        tcp_header = packet.getlayer(scapy.TCP)
        # Extract features from the TCP packet
        if tcp_header is not None:
            data_offset = tcp_header.dataofs
            if data_offset < 5:
              is_corrupted_data_offset = 1  # Assuming a valid data offset is at least 5
            else:
              is_corrupted_data_offset = 0
        else:
          is_corrupted_data_offset = 0
        # 2. Packet size discrepancies
        packet_size = len(packet)
        expected_packet_size = 1500  # Typical Ethernet frame size
        size_discrepancy = abs(packet_size - expected_packet_size)
        # 3. Alterations in TCP header fields
        # checksum = tcp_header.chksum if hasattr(tcp_header, 'chksum') else 0
        # ttl = tcp_header.ttl if hasattr(tcp_header, 'ttl') else 0
        # flags = tcp_header.flags if hasattr(tcp_header, 'flags') else 0  # You can further analyze specific flags if needed
        label = 'Geneva' if (is_corrupted_data_offset == 1 and size_discrepancy < 10) else 'Normal'
        features.append([is_corrupted_data_offset, size_discrepancy])
        labels.append(label)
    return features, labels

In [4]:
# Example usage
ml_trw_detector = ML_TRW_Detection(alpha=0.01, beta=0.99)

In [5]:
# Load and preprocess PCAP data, extract features, and label the data
uploaded_files = files.upload()
geneva_traffic_file = list(uploaded_files.keys())[0]  # Assuming only one file is uploaded
normal_traffic_file = list(uploaded_files.keys())[1]  # Assuming only two files are uploaded

# Replace the indices with the whole file for Geneva and normal traffic
Geneva_traffic_indices = [geneva_traffic_file]
Normal_traffic_indices = [normal_traffic_file]

Saving 200722_win_scale_examples_anon.pcapng to 200722_win_scale_examples_anon.pcapng
Saving 2022-02-23-traffic-analysis.pcap to 2022-02-23-traffic-analysis.pcap


In [10]:
import scapy.all as scapy

# Load and preprocess PCAP data, extract features, and label the data
pcap_files = [geneva_traffic_file, normal_traffic_file]
features, labels = load_and_label_pcap_data(pcap_files)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train ML model with Geneva and Normal traffic data
ml_trw_detector.train_model(X_train, y_train)

# Detect Geneva traffic in real-time
# result = ml_trw_detector.detect_Geneva_realtime('200722_win_scale_examples_anon.pcapng', '2022-02-23-traffic-analysis.pcap')
# print(result)

# Evaluate model performance
y_pred = ml_trw_detector.rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.91


Evaluation against 13 censors provided by Geneva

In [11]:
def extract_flows(pcap_file):
    # Read the pcap file
    packets = rdpcap(pcap_file)

    # Initialize a dictionary to store flows
    flows = {}

    # Iterate over each packet
    for packet in packets:
        # Extract relevant fields from the packet
        src_ip = packet[IP].src
        dst_ip = packet[IP].dst
        src_port = packet[TCP].sport if TCP in packet else None
        dst_port = packet[TCP].dport if TCP in packet else None
        protocol = packet[IP].proto

        # Create a flow key based on relevant fields
        flow_key = (src_ip, dst_ip, src_port, dst_port, protocol)

        # Add packet to the corresponding flow
        if flow_key in flows:
            flows[flow_key].append(packet)
        else:
            flows[flow_key] = [packet]

    return flows

In [None]:
# Assume you have already instantiated your ML_TRW_Detection class and trained the model

from scapy.all import wrpcap, IP, TCP
# Define the sample packets
sample_packets = [
    # SYN packet to synchronize TCB
    IP(src="192.168.1.1", dst="10.0.0.1") / TCP(sport=1234, dport=80, flags="S"),
    # Packet without forbidden keyword
    IP(src="10.0.0.1", dst="192.168.1.1") / TCP(sport=80, dport=1234) / "Normal payload",
    # Packet with forbidden keyword
    IP(src="10.0.0.1", dst="192.168.1.1") / TCP(sport=80, dport=1234) / "Forbidden keyword payload",
]

pcap_file1 = "sample1.pcap"
wrpcap(pcap_file1, sample_packets[0])
pcap_file2 = "sample2.pcap"
wrpcap(pcap_file2, sample_packets[1])
pcap_file3 = "sample3.pcap"
wrpcap(pcap_file3, sample_packets[2])

# Initialize a list to store detection results
detection_results = []

# Perform detection on each pcap file
for pcap_file in [pcap_file1, pcap_file2, pcap_file3]:
    packets = scapy.rdpcap(pcap_file)
    detection_results.append(ml_trw_detector.detect_Geneva(packets))

# Display detection results
for i, result in enumerate(detection_results):
    print(f"Detection result for sample {i+1}: {result}")

# Expected behavior of Censor 1
expected_behavior = [
    False,  # No detection on SYN packet
    False,  # No detection on packet without forbidden keyword
    True,   # Detection on packet with forbidden keyword
]

# Compare results with expected behavior
for i, result in enumerate(detection_results):
    if result != expected_behavior[i]:
        print(f"Error: Unexpected detection result for packet {i+1}")
else:
    print("Detection results match expected behavior")