In [9]:
# Import all required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image, display, Markdown
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report


In [11]:
import os
import pandas as pd
import numpy as np
from IPython.display import Markdown, display

def generate_realistic_dataset():
    protocols = ['RIP', 'OSPF', 'EIGRP']
    scenarios = ['normal', 'congestion', 'link_failure']
    data = []
    
    # Generate realistic timestamps
    dates = pd.date_range("2024-05-10", periods=10).strftime("%d/%b")
    
    for i, date in enumerate(dates):
        for protocol in protocols:
            # Base metrics (unchanged)
            if protocol == 'RIP':
                latency = max(50, np.random.normal(100, 25))
                jitter = max(10, np.random.normal(30, 10))
                packet_loss = np.random.normal(1.0, 0.3)
                convergence = max(20, np.random.normal(40, 15))
            elif protocol == 'OSPF':
                latency = max(30, np.random.normal(60, 15))
                jitter = max(5, np.random.normal(20, 5))
                packet_loss = np.random.normal(0.5, 0.2)
                convergence = max(10, np.random.normal(25, 8))
            else:  # EIGRP
                latency = max(20, np.random.normal(35, 10))
                jitter = max(2, np.random.normal(12, 3))
                packet_loss = np.random.normal(0.2, 0.1)
                convergence = max(5, np.random.normal(15, 5))
            
            # Scenario effects (unchanged)
            scenario = scenarios[i % 3]
            if scenario == 'congestion':
                latency *= 1.5
                jitter *= 1.5
            elif scenario == 'link_failure':
                convergence *= 2 if protocol != 'RIP' else 3
            
            # Add manual-looking notes (unchanged)
            notes = f"{date} - {scenario} scenario"
            if scenario == 'link_failure':
                notes += f" (R{i%3+1}-R{(i+1)%3+1} link down)"
            
            data.append([protocol, latency, jitter, packet_loss, convergence, scenario, notes])
    
    return pd.DataFrame(data, columns=['Protocol','Latency','Jitter','PacketLoss','ConvergenceTime','Scenario','Notes'])

# 1. Create folders if they don't exist
os.makedirs('data/raw', exist_ok=True)

# 2. Generate data
df = generate_realistic_dataset()

# 3. Save with Windows-compatible path
try:
    save_path = os.path.normpath('data/raw/simulated_metrics.csv')
    df.to_csv(save_path, index=False)
    display(Markdown(f"### ✅ Successfully saved to: `{save_path}`"))
    display(Markdown("### Sample of Collected Data"))
    display(df.sample(5))
except Exception as e:
    display(Markdown(f"### ❌ Error saving file: {e}"))
    display(Markdown("### Saving to current directory instead..."))
    df.to_csv('simulated_metrics.csv', index=False)
    display(Markdown("### Sample of Collected Data"))
    display(df.sample(5))

### ✅ Successfully saved to: `data\raw\simulated_metrics.csv`

### Sample of Collected Data

Unnamed: 0,Protocol,Latency,Jitter,PacketLoss,ConvergenceTime,Scenario,Notes
27,RIP,141.438711,37.394621,1.638186,20.0,normal,19/May - normal scenario
1,OSPF,30.0,29.858703,0.593371,25.798575,normal,10/May - normal scenario
28,OSPF,45.465464,15.720351,0.355247,25.558225,normal,19/May - normal scenario
22,OSPF,106.171023,29.370915,0.742931,13.432689,congestion,17/May - congestion scenario
8,EIGRP,38.893083,10.648248,0.131215,30.396551,link_failure,12/May - link_failure scenario (R3-R1 link down)
