# Section 3.3, generating synthetic traffic data for Q-learning training
The simulation models academic calendar phases, class-specific traffic patterns, and infrastructure constraints.

In [1]:
import simpy
import numpy as np
import pandas as pd
from scipy.stats import weibull_min, gamma, poisson, powerlaw, lognorm
import datetime

### Configuration Parameters

In [3]:
SIMULATION_DAYS = 90  # One academic season (e.g., 12-week lecture period)
SIMULATION_RESOLUTION = 5  # Minutes between measurements
CLASSES = ['Teaching', 'Non-teaching', 'Managerial', 'Non-managerial']
CAPACITY_PROFILE = {  # Mbps (RENU infrastructure upgrades)
    '2023': 80,
    '2025': 500
}

### Determine academic phase based on simulated date

In [4]:
def academic_phase_detector(t):
    """Determine academic phase based on simulated date"""
    exam_peaks = [  # Simulated exam periods (Mondays)
        datetime.date(2024, 3, 15), datetime.date(2024, 6, 10)
    ]
    admin_peaks = [  # Simulated administrative cycles
        datetime.date(2024, 1, 20), datetime.date(2024, 4, 5)
    ]
    
    current_date = datetime.date(2024, 1, 1) + datetime.timedelta(days=t)
    
    # Exam proximity calculation
    exam_dist = min(abs((current_date - peak).days) for peak in exam_peaks)
    if exam_dist <= 14:  # 2-week exam period
        phase = 'Examination'
        phase_intensity = max(0.6, 1.0 - (exam_dist / 14))
    elif any(abs((current_date - peak).days) <= 7 for peak in admin_peaks):
        phase = 'Administrative'
        phase_intensity = 1.0
    else:
        phase = 'Lecture'
        phase_intensity = 1.0
    
    return phase, phase_intensity

### Generate traffic for a specific class based on academic phase

In [5]:
def generate_traffic(class_name, phase, intensity, t):
    """Generate traffic for a specific class based on academic phase"""
    # Base traffic parameters (requests per minute)
    base_params = {
        'Lecture': {
            'Teaching': 25, 'Non-teaching': 18,
            'Managerial': 8, 'Non-managerial': 15
        },
        'Examination': {
            'Teaching': 40, 'Non-teaching': 25,
            'Managerial': 10, 'Non-managerial': 20
        },
        'Administrative': {
            'Teaching': 20, 'Non-teaching': 15,
            'Managerial': 30, 'Non-managerial': 18
        }
    }
    
    # Apply phase intensity
    base_rate = base_params[phase][class_name] * intensity
    
    # Diurnal pattern (peak during 10AM-4PM)
    hour = (t % 1440) / 60
    diurnal_factor = 0.6 + 0.4 * np.cos((hour - 13) * np.pi / 6)
    
    # Generate request count
    if phase == 'Lecture':
        requests = weibull_min.rvs(1.2, scale=base_rate * diurnal_factor)
    elif phase == 'Examination':
        requests = gamma.rvs(2.0, scale=base_rate * diurnal_factor * 0.8)
    else:  # Administrative
        requests = poisson.rvs(base_rate * diurnal_factor)
    
    # Generate traffic volume (MB)
    if class_name == 'Teaching':
        volume = powerlaw.rvs(2.3, scale=120) * max(1, requests)
    elif class_name == 'Non-teaching':
        volume = np.random.lognormal(5.0, 0.5) * requests
    elif class_name == 'Managerial':
        volume = np.random.uniform(5, 20) * requests
    else:  # Non-managerial
        volume = lognorm.rvs(0.7, scale=np.exp(5.2)) * requests
    
    return max(0, int(requests)), max(0, volume)

### Calculate policy-driven weight based on academic calendar

In [6]:
def policy_weight_calculator(class_name, t):
    """Calculate policy-driven weight based on academic calendar"""
    # Base weights from Delphi panel (Section 3.2)
    base_weights = {
        'Teaching': 0.28,
        'Non-teaching': 0.12,
        'Managerial': 0.35,
        'Non-managerial': 0.25
    }
    
    # Exam proximity effect (only affects Teaching)
    phase, intensity = academic_phase_detector(t)
    if class_name == 'Teaching' and phase == 'Examination':
        exam_boost = 0.3 * intensity
        return min(0.58, base_weights['Teaching'] + exam_boost)
    
    return base_weights[class_name]

### Main simulation function using SimPy

In [7]:
def simulate_network_traffic():
    """Main simulation function using SimPy"""
    env = simpy.Environment()
    total_intervals = SIMULATION_DAYS * 24 * (60 // SIMULATION_RESOLUTION)
    results = []
    
    def traffic_monitor(env):
        """Monitor network state at regular intervals"""
        nonlocal results
        for t in range(total_intervals):
            # Convert interval to simulation days
            sim_day = t / (24 * (60 // SIMULATION_RESOLUTION))
            
            # Determine academic phase
            phase, intensity = academic_phase_detector(sim_day)
            
            # Get current capacity
            capacity = CAPACITY_PROFILE['2025'] if sim_day > 365 else CAPACITY_PROFILE['2023']
            
            # Initialize state vector
            state_vector = {
                'timestamp': t * SIMULATION_RESOLUTION,
                'phase': phase,
                'capacity': capacity,
                'total_requests': 0,
                'total_volume': 0
            }
            
            # Generate traffic for each class
            class_metrics = {}
            for class_name in CLASSES:
                requests, volume = generate_traffic(class_name, phase, intensity, t)
                weight = policy_weight_calculator(class_name, sim_day)
                
                state_vector.update({
                    f'{class_name}_requests': requests,
                    f'{class_name}_volume': volume,
                    f'{class_name}_weight': weight
                })
                
                # Track totals
                state_vector['total_requests'] += requests
                state_vector['total_volume'] += volume
                class_metrics[class_name] = (requests, volume, weight)
            
            # Calculate bandwidth utilization
            utilized_bw = min(capacity, state_vector['total_volume'] * 8 / (SIMULATION_RESOLUTION * 60))
            state_vector['utilization'] = utilized_bw / capacity
            
            # Store results
            results.append(state_vector)
            yield env.timeout(SIMULATION_RESOLUTION)
    
    env.process(traffic_monitor(env))
    env.run(until=total_intervals * SIMULATION_RESOLUTION)
    
    # Convert to DataFrame and save
    df = pd.DataFrame(results)
    df.to_csv('network_traffic_simulation.csv', index=False)
    print(f"Simulation complete. {len(df)} records saved to CSV.")
    
    return df

In [8]:
traffic_data = simulate_network_traffic()

Simulation complete. 25920 records saved to CSV.
