In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout

# Assuming we have our DataFrame 'df' from the previous example
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Generate dates for one year of hourly data
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(hours=i) for i in range(8760)]  # 365 days * 24 hours

# Create base data
data = {
    'timestamp': dates,
    'network_traffic': np.random.normal(1000, 200, 8760),
    'latency': np.random.normal(20, 5, 8760),
    'packet_loss': np.random.normal(0.5, 0.2, 8760),
    'cpu_usage': np.random.normal(60, 15, 8760),
    'memory_usage': np.random.normal(70, 10, 8760),
    'trading_volume': np.random.normal(10000, 2000, 8760),
    'market_volatility': np.random.normal(15, 5, 8760)
}

df = pd.DataFrame(data)

# Add daily and weekly patterns
df['network_traffic'] += np.sin(np.arange(8760) * (2 * np.pi / 24)) * 200  # Daily pattern
df['trading_volume'] += np.sin(np.arange(8760) * (2 * np.pi / (24 * 7))) * 3000  # Weekly pattern

# Add some random congestion events
congestion_events = np.random.choice(8760, size=100, replace=False)
df.loc[congestion_events, 'network_traffic'] *= np.random.uniform(1.5, 2.5, 100)
df.loc[congestion_events, 'latency'] *= np.random.uniform(1.3, 2.0, 100)
df.loc[congestion_events, 'packet_loss'] *= np.random.uniform(1.5, 3.0, 100)

# Add some correlated effects
df.loc[df['trading_volume'] > df['trading_volume'].quantile(0.95), 'network_traffic'] *= 1.2
df.loc[df['cpu_usage'] > 80, 'latency'] *= 1.1

# Create congestion_event column
df['congestion_event'] = 0
df.loc[(df['network_traffic'] > df['network_traffic'].quantile(0.95)) &
       (df['latency'] > df['latency'].quantile(0.95)) &
       (df['packet_loss'] > df['packet_loss'].quantile(0.95)), 'congestion_event'] = 1

# Ensure non-negative values
for col in ['network_traffic', 'latency', 'packet_loss', 'cpu_usage', 'memory_usage', 'trading_volume', 'market_volatility']:
    df[col] = df[col].clip(lower=0)

# Display basic statistics and first few rows
print(f"\nDataset shape: {df.shape}")
print(f"Number of congestion events: {df['congestion_event'].sum()}")

# Save to CSV (optional)
df.to_csv('network_data.csv', index=False)
df.head()


Dataset shape: (8760, 9)
Number of congestion events: 40


Unnamed: 0,timestamp,network_traffic,latency,packet_loss,cpu_usage,memory_usage,trading_volume,market_volatility,congestion_event
0,2023-01-01 00:00:00,1099.342831,24.857062,0.52407,76.209318,66.188741,12272.132384,16.834907,0
1,2023-01-01 01:00:00,1024.110949,13.523707,0.644091,71.681097,76.880177,11414.984879,15.522807,0
2,2023-01-01 02:00:00,1229.537708,14.062234,0.328283,66.577592,56.918259,9382.709891,8.767731,0
3,2023-01-01 03:00:00,1446.027328,22.894549,0.839103,69.568291,58.507044,11941.551252,21.735282,0
4,2023-01-01 04:00:00,1126.374406,19.678312,0.090567,62.702585,67.015647,14610.942936,11.285495,0


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Assuming df is your DataFrame
# Ensure congestion_event is of integer type
df['congestion_event'] = df['congestion_event'].astype(int)

# Data Preparation
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:(i + sequence_length)])
        y.append(data[i + sequence_length])
    return np.array(X), np.array(y)

# Prepare features and target
features = ['network_traffic', 'latency', 'packet_loss', 'cpu_usage', 'memory_usage', 'trading_volume', 'market_volatility']
X = df[features].values
y_congestion = df['congestion_event'].values  # This should now be integers (0 or 1)

# Create sequences
sequence_length = 24  # Use 24 hours of data to predict the next hour
X_seq, y_seq = create_sequences(X, sequence_length)
y_congestion_seq = y_seq[:, -1]  # We only need the congestion_event for the target

# Split the data
X_train, X_test, y_congestion_train, y_congestion_test = train_test_split(X_seq, y_congestion_seq, test_size=0.2, random_state=42)

# Ensure y_congestion_train and y_congestion_test are integers
y_congestion_train = y_congestion_train.astype(int)
y_congestion_test = y_congestion_test.astype(int)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Network Traffic Prediction Model (LSTM)
def create_traffic_model(input_shape):
    model = Sequential([
        LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
        Dropout(0.2),
        LSTM(32, activation='relu'),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

traffic_model = create_traffic_model((sequence_length, X_train.shape[-1]))
traffic_model.fit(X_train_scaled, X_train[:, -1, 0], epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# Predict traffic for training and test data
X_train_traffic_pred = traffic_model.predict(X_train_scaled)
X_test_traffic_pred = traffic_model.predict(X_test_scaled)

# Prepare data for congestion model (replace actual traffic with predicted traffic)
X_train_with_pred = X_train_scaled.copy()
X_test_with_pred = X_test_scaled.copy()

X_train_with_pred[:, -1, 0] = X_train_traffic_pred.flatten()
X_test_with_pred[:, -1, 0] = X_test_traffic_pred.flatten()

# Congestion Prediction Model (Random Forest)
def create_congestion_model():
    return RandomForestClassifier(n_estimators=100, random_state=42)

congestion_model = create_congestion_model()
congestion_model.fit(X_train_with_pred[:, -1, :], y_congestion_train)

# Evaluate congestion prediction model
congestion_pred = congestion_model.predict(X_test_with_pred[:, -1, :])
print("\nCongestion Prediction Performance:")
print(classification_report(y_congestion_test, congestion_pred))
print(confusion_matrix(y_congestion_test, congestion_pred))


Congestion Prediction Performance:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00        11
           3       0.00      0.00      0.00        13
           4       0.00      0.00      0.00        12
           5       0.00      0.00      0.00        21
           6       0.00      0.00      0.00        42
           7       0.00      0.00      0.00        36
           8       0.07      0.03      0.04        67
           9       0.03      0.01      0.02        68
          10       0.03      0.03      0.03       102
          11       0.04      0.05      0.04       109
          12       0.09      0.12      0.10       129
          13       0.06      0.09      0.07       139
          14       0.09      0.15      0.11       118
          15       0.09      0.11      0.10       148
          16       0.06      0.09      0.07  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:

# 4. Prediction and Mitigation Functions
def predict_traffic_and_congestion(traffic_model, congestion_model, scaler, current_data):
    """
    Predict network traffic and then use it to predict congestion probability.
    """
    scaled_data = scaler.transform(current_data.reshape(-1, current_data.shape[-1])).reshape(current_data.shape)

    # Predict traffic
    traffic_pred = traffic_model.predict(scaled_data)[0][0]

    # Replace actual traffic with predicted traffic
    scaled_data_with_pred = scaled_data.copy()
    scaled_data_with_pred[0, -1, 0] = traffic_pred

    # Predict congestion
    congestion_prob = congestion_model.predict_proba(scaled_data_with_pred[0, -1, :].reshape(1, -1))[0][1]

    return traffic_pred, congestion_prob

def mitigate_congestion(traffic_pred, congestion_prob, threshold=0.0):
    """
    Implement mitigation strategies based on predicted traffic and congestion probability.
    """
    print(f"Predicted Network Traffic: {traffic_pred:.2f}")
    print(f"Congestion Probability: {congestion_prob:.4f}")

    if congestion_prob > threshold:
        print("High risk of congestion detected. Implementing mitigation strategies:")
        print("1. Rerouting network traffic")
        print("2. Adjusting network configurations")
        print("3. Scaling up resources")

        if congestion_prob > 0.9:
            print("4. Activating emergency backup systems")
            print("5. Notifying IT team for immediate intervention")
    else:
        print("Low risk of congestion. Maintaining normal operations.")



In [None]:
# 5. Test Case
np.random.seed(42)
test_case = pd.DataFrame({
    'network_traffic': np.random.normal(1000, 200, 48),
    'latency': np.random.normal(20, 5, 48),
    'packet_loss': np.random.normal(0.5, 0.2, 48),
    'cpu_usage': np.random.normal(60, 15, 48),
    'memory_usage': np.random.normal(70, 10, 48),
    'trading_volume': np.random.normal(10000, 2000, 48),
    'market_volatility': np.random.normal(15, 5, 48)
})

# Add some patterns to make it more realistic
test_case['network_traffic'] += np.sin(np.arange(48) * (2 * np.pi / 24)) * 200  # Daily pattern
test_case['trading_volume'] += np.sin(np.arange(48) * (2 * np.pi / 24)) * 3000  # Daily pattern

# Simulate a spike in network traffic and trading volume
test_case.loc[30:35, 'network_traffic'] *= 2
test_case.loc[30:35, 'trading_volume'] *= 1.5
test_case.loc[30:35, 'latency'] *= 1.5
test_case.loc[30:35, 'packet_loss'] *= 2

# Ensure non-negative values
for col in test_case.columns:
    test_case[col] = test_case[col].clip(lower=0)

# Prepare the test data
X_test = test_case.values
X_test_seq = np.array([X_test[i:i+sequence_length] for i in range(24)])



In [None]:
# Function to print network status
def print_network_status(hour, data):
    print(f"Hour {hour + 1} Network Status:")
    print(f"  Actual Network Traffic: {data[hour, -1, 0]:.2f}")
    print(f"  Latency: {data[hour, -1, 1]:.2f}")
    print(f"  Packet Loss: {data[hour, -1, 2]:.2f}")
    print(f"  CPU Usage: {data[hour, -1, 3]:.2f}")
    print(f"  Memory Usage: {data[hour, -1, 4]:.2f}")
    print(f"  Trading Volume: {data[hour, -1, 5]:.2f}")
    print(f"  Market Volatility: {data[hour, -1, 6]:.2f}")

# Run the test case
for i in range(24):
    current_data = X_test_seq[i:i+1]
    traffic_pred, congestion_prob = predict_traffic_and_congestion(traffic_model, congestion_model, scaler, current_data)

    print_network_status(i, X_test_seq)
    mitigate_congestion(traffic_pred, congestion_prob)
    print()

Hour 1 Network Status:
  Actual Network Traffic: 663.29
  Latency: 27.69
  Packet Loss: 0.65
  CPU Usage: 88.45
  Memory Usage: 77.59
  Trading Volume: 7174.77
  Market Volatility: 20.49
Predicted Network Traffic: 768.42
Congestion Probability: 0.0000
Low risk of congestion. Maintaining normal operations.

Hour 2 Network Status:
  Actual Network Traffic: 891.12
  Latency: 19.82
  Packet Loss: 0.66
  CPU Usage: 56.32
  Memory Usage: 62.27
  Trading Volume: 9494.86
  Market Volatility: 19.13
Predicted Network Traffic: 958.93
Congestion Probability: 0.0000
Low risk of congestion. Maintaining normal operations.

Hour 3 Network Status:
  Actual Network Traffic: 1073.95
  Latency: 27.82
  Packet Loss: 0.32
  CPU Usage: 48.69
  Memory Usage: 67.63
  Trading Volume: 8280.89
  Market Volatility: 19.07
Predicted Network Traffic: 1106.91
Congestion Probability: 0.0000
Low risk of congestion. Maintaining normal operations.

Hour 4 Network Status:
  Actual Network Traffic: 869.80
  Latency: 6.90
  