# Classroom Usage Forecast using ARIMA Model

This notebook demonstrates time series forecasting for classroom usage using real UCI occupancy sensor data wit

## 1. Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
from datetime import datetime, timedelta

## 2. Data Simulation (Classroom Data)

In [2]:
def generate_classroom_data(days=7):
    np.random.seed(42)
    start_date = datetime(2025, 1, 8, 8, 0)  # Classes start at 8 AM
    data = []
    
    # Simulate hourly data for 'days' amount of time
    for i in range(days * 24):
        current_time = start_date + timedelta(hours=i)
        hour = current_time.hour
        weekday = current_time.weekday()  # 0=Mon, 6=Sun
        
        # Logic: Classes happen 8 AM - 6 PM (Hours 8-18), Mon-Fri (0-4)
        if 8 <= hour <= 18 and weekday < 5:
            # High occupancy (Wi-Fi logs) means high power draw
            occupancy = np.random.randint(20, 50)
            base_load = 5.0  # Projectors, ACs on
        else:
            # Room empty at night/weekends
            occupancy = np.random.randint(0, 3)
            base_load = 1.0  # Just emergency lights/standby
        
        # Electricity Draw = Base + (Occupancy * factor) + Noise
        power_draw = base_load + (occupancy * 0.1) + np.random.normal(0, 0.2)
        data.append([current_time, occupancy, max(0, power_draw)])
    
    return pd.DataFrame(data, columns=['timestamp', 'occupancy', 'power_kw'])

In [3]:
# Generate 14 days of classroom data
df = generate_classroom_data(days=14)
print(f"Generated {len(df)} hours of data")
print(df.head())

Generated 336 hours of data
            timestamp  occupancy  power_kw
0 2025-01-08 08:00:00         26  7.489953
1 2025-01-08 09:00:00         27  7.803087
2 2025-01-08 10:00:00         48  9.560439
3 2025-01-08 11:00:00         42  9.628332
4 2025-01-08 12:00:00         30  7.597807


## 3. ARIMA Modeling

In [4]:
# ARIMA needs a purely 1D series. We focus on 'power_kw'.
# We will use 95% of data to train, and predict the last bit + future.
train_size = int(len(df) * 0.95)
train_data = df['power_kw'][:train_size]
test_data = df['power_kw'][train_size:]

print(f"Training data size: {len(train_data)}")
print(f"Test data size: {len(test_data)}")

Training data size: 319
Test data size: 17


In [5]:
# Initialize ARIMA Model
# Order (p,d,q) = (2,1,2) is a standard starting point for generic time series
# p=AutoRegressive terms, d=Differencing (to remove trend), q=Moving Average
model = ARIMA(train_data, order=(2, 1, 2))
model_fit = model.fit()

print("ARIMA Model Summary:")
print(model_fit.summary())

ARIMA Model Summary:
                               SARIMAX Results                                
Dep. Variable:               power_kw   No. Observations:                  319
Model:                 ARIMA(2, 1, 2)   Log Likelihood                -652.068
Date:                Thu, 08 Jan 2026   AIC                           1314.136
Time:                        22:34:39   BIC                           1332.946
Sample:                             0   HQIC                          1321.649
                                - 319                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1426      0.048     -2.952      0.003      -0.237      -0.048
ar.L2          0.8574      0.049     17.477      0.000       0.761       0.954
ma.L1         -0.0002      0.89



In [6]:
# Forecast for the length of test data + 12 hours into future
forecast_steps = len(test_data) + 12
forecast_result = model_fit.get_forecast(steps=forecast_steps)

# Extract forecast values and Confidence Intervals
forecast_values = forecast_result.predicted_mean
conf_int = forecast_result.conf_int()  # Lower and Upper bounds

# Create time index for forecast
last_train_time = df['timestamp'].iloc[train_size - 1]
forecast_time_index = [last_train_time + timedelta(hours=x+1) for x in range(forecast_steps)]

print(f"Forecast generated for {forecast_steps} time steps")

Forecast generated for 29 time steps


## 4. Visualization (Plotly Dashboard)

In [7]:
fig = go.Figure()

# 1. Historical Data (The Truth)
fig.add_trace(go.Scatter(
    x=df['timestamp'][:train_size], 
    y=df['power_kw'][:train_size],
    name="Historical Data",
    line=dict(color='gray')
))

# 2. Actual Future Data (Ground Truth to compare)
fig.add_trace(go.Scatter(
    x=df['timestamp'][train_size:], 
    y=df['power_kw'][train_size:],
    name="Actual Observed",
    mode='lines',
    line=dict(color='orange', width=2)
))

# 3. The Forecast (Model Prediction)
fig.add_trace(go.Scatter(
    x=forecast_time_index,
    y=forecast_values,
    name="ARIMA Forecast",
    line=dict(color='blue', width=3)
))

# 4. Confidence Intervals (Shaded Area)
# We draw the Upper Bound, then the Lower Bound with a fill to create the shade
fig.add_trace(go.Scatter(
    x=forecast_time_index, 
    y=conf_int.iloc[:, 1],  # Upper Bound
    mode='lines',
    line=dict(width=0),
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=forecast_time_index, 
    y=conf_int.iloc[:, 0],  # Lower Bound
    mode='lines',
    line=dict(width=0),
    fill='tonexty',  # Fills area between this trace and the previous one
    fillcolor='rgba(0, 0, 255, 0.2)',  # Semi-transparent blue
    name="95% Confidence Interval"
))

# Layout Polish
fig.update_layout(
    title="Classroom Electricity Forecast (ARIMA Model)",
    yaxis_title="Power Draw (kW)",
    xaxis_title="Time",
    template="plotly_white",
    height=600
)

fig.show()