In [1]:
# Cell 1: Clustering Zones by Network Performance
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

df = pd.read_csv('Cleaned_rolled.csv', parse_dates=['datetime'])

# Select features for clustering
features = ['x', 'y', 'lat_mean', 'br_ratio', 'send_data', 'Bitrate', 'Retransmissions']
X = df[features].dropna()

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Determine optimal k using silhouette scores
sil_scores = {}
for k in range(2, 8):
    km = KMeans(n_clusters=k, random_state=42)
    labels = km.fit_predict(X_scaled)
    sil_scores[k] = silhouette_score(X_scaled, labels)
print("Silhouette scores by k:", sil_scores)

# Fit final KMeans (choose k with highest silhouette)
k_opt = max(sil_scores, key=sil_scores.get)
kmeans = KMeans(n_clusters=k_opt, random_state=42)
labels = kmeans.fit_predict(X_scaled)
df.loc[X.index, 'cluster'] = labels

# Inspect cluster centers (inverse-scaled)
centers = scaler.inverse_transform(kmeans.cluster_centers_)
cluster_centers = pd.DataFrame(centers, columns=features)
print("Cluster centers:\n", cluster_centers)

# Profile cluster performance
cluster_profiles = df.groupby('cluster')[['lat_mean','br_ratio','send_data']].mean()
print("Cluster profiles:\n", cluster_profiles)

# Cell 2: Forecasting Next-Hour Latency
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error

# Aggregate to hourly latency
hourly = df['lat_mean'].resample('1H').mean().dropna()

# Train/test split (80/20)
split = int(len(hourly) * 0.8)
train, test = hourly.iloc[:split], hourly.iloc[split:]

# Fit SARIMAX with daily seasonality
model = SARIMAX(train, order=(1,0,1), seasonal_order=(0,1,1,24))
res = model.fit(disp=False)

# Forecast the next 24 hours
forecast = res.get_forecast(steps=24)
pred = forecast.predicted_mean

# Evaluate on test set
mae = mean_absolute_error(test, res.predict(start=test.index[0], end=test.index[-1]))
print(f"Test MAE: {mae:.3f}")

# Plot train, test, and forecast
plt.figure()
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(pred.index, pred, label='Forecast')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Latency (ms)')
plt.title('Hourly Latency Forecast')
plt.show()



KeyboardInterrupt: 