# Cloud AutoScale - Modeling Pipeline

Production-grade feature engineering and ML forecasting

In [14]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

sys.path.insert(0, str(Path.cwd().parent))
from cloud_autoscale.data import SyntheticLoader, GCP2019Loader

print("✓ Imports successful")

✓ Imports successful


## 1. Load Data

In [15]:
# Load synthetic data
loader = SyntheticLoader(pattern="bursty", duration_minutes=1440, step_minutes=5, seed=42)
df = loader.load()
print(f"Loaded {len(df)} steps")

Loaded 288 steps


## 2. Feature Engineering

In [16]:
# Create lag features
for lag in [1, 3, 6, 12]:
    df[f"cpu_lag{lag}"] = df["cpu_demand"].shift(lag)

# Rolling stats
for w in [3, 6, 12]:
    df[f"cpu_ma{w}"] = df["cpu_demand"].rolling(w, min_periods=1).mean()
    df[f"cpu_std{w}"] = df["cpu_demand"].rolling(w, min_periods=1).std().fillna(0)

# Cyclical features
df["hour_sin"] = np.sin(2*np.pi*df["step"]/288)
df["hour_cos"] = np.cos(2*np.pi*df["step"]/288)

df = df.dropna()
print(f"Features: {len(df.columns)}")
print(f"Shape: {len(df.shape)}")

Features: 22
Shape: 2


## 3. Train-Test Split

In [17]:
split = int(len(df)*0.8)
train, test = df.iloc[:split], df.iloc[split:]

feature_cols = [c for c in df.columns if c not in ["step","time","cpu_demand","mem_demand","new_instances","machines_reporting"]]
X_train, y_train = train[feature_cols], train["cpu_demand"]
X_test, y_test = test[feature_cols], test["cpu_demand"]

print(f"Train: {len(X_train)}, Test: {len(X_test)}")

Train: 0, Test: 0


## 4. Train Models

In [18]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")

ValueError: Found array with 0 sample(s) (shape=(0, 16)) while a minimum of 1 is required by LinearRegression.

## 5. Visualize Results

In [None]:
plt.figure(figsize=(14,6))
plt.plot(y_test.values[:100], label="Actual")
plt.plot(y_pred[:100], label="Predicted")
plt.legend()
plt.title("Demand Forecast")
plt.show()