# Network Traffic Analysis for Anomaly Detection
This notebook demonstrates a simple network traffic analysis project for detecting anomalies in packet sizes.

## Step 1: Data Collection
In a real-world scenario, you would collect actual network traffic data. For this example, we'll generate sample data.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Set plot style
plt.style.use('ggplot')

def generate_sample_data(num_records=1000):
    np.random.seed(42)
    data = {
        'timestamp': pd.date_range(start='2024-01-01', periods=num_records, freq='1min'),
        'source_ip': np.random.choice(['192.168.1.' + str(i) for i in range(1, 11)], num_records),
        'destination_ip': np.random.choice(['10.0.0.' + str(i) for i in range(1, 6)], num_records),
        'protocol': np.random.choice(['TCP', 'UDP', 'ICMP'], num_records, p=[0.7, 0.25, 0.05]),
        'packet_size': np.random.normal(500, 150, num_records).astype(int),
    }
    return pd.DataFrame(data)

df = generate_sample_data()
print(df.head())
print(f"\nDataset shape: {df.shape}")


            timestamp    source_ip destination_ip protocol  packet_size
0 2024-01-01 00:00:00  192.168.1.7       10.0.0.1      TCP          511
1 2024-01-01 00:01:00  192.168.1.4       10.0.0.4      TCP          503
2 2024-01-01 00:02:00  192.168.1.8       10.0.0.3      TCP          252
3 2024-01-01 00:03:00  192.168.1.5       10.0.0.4      TCP          680
4 2024-01-01 00:04:00  192.168.1.7       10.0.0.4      TCP          371

Dataset shape: (1000, 5)
