In [1]:
# HYPERLOCAL WEATHER FORECAST MODEL TRAINING NOTEBOOK

# ==============================================================================
# CELL 1: INITIAL SETUP AND IMPORTS
# ==============================================================================
# In this cell, we import all the necessary libraries for data handling,
# model building, and saving our progress.

import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import joblib
from datetime import datetime

print("TensorFlow Version:", tf.__version__)


TensorFlow Version: 2.19.0


In [None]:
# ==============================================================================
# CELL 2: CONFIGURATION PARAMETERS
# ==============================================================================
# Here, we define the key parameters for our model and data.
# - LOOKBACK_WINDOW: How many past time steps the model uses to make a prediction.
# - FORECAST_HORIZON: How many future time steps the model will predict.
# - MODEL_DIR: The directory where the trained model and scalers will be saved.

LOOKBACK_WINDOW = 24  # Using the past 24 hours of data
FORECAST_HORIZON = 7  # To predict the next 7 hours/steps
MODEL_DIR = 'models/saved_model'
DATA_FILE = '/Users/dipanshu_08/Downloads/IOTWeatherStationAndForecast-main/data/weather_data.csv' 

# Create the directory to save the model if it doesn't exist
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"Configuration loaded:")
print(f"  - Lookback Window: {LOOKBACK_WINDOW} steps")
print(f"  - Forecast Horizon: {FORECAST_HORIZON} steps")
print(f"  - Model Save Directory: {MODEL_DIR}")
print(f"  - Data File: {DATA_FILE}")

Configuration loaded:
  - Lookback Window: 24 steps
  - Forecast Horizon: 7 steps
  - Model Save Directory: models/saved_model
  - Data File: /Users/dipanshu_08/Downloads/IOTWeatherStationAndForecast-main/data/synthetic_weather_data.csv


In [3]:
# ==============================================================================
# CELL 3: LOAD AND INSPECT THE DATASET
# ==============================================================================
# This cell loads your historical weather data from a CSV file.
# It's crucial that your CSV file has columns like 'temperature', 'humidity',
# 'pressure', 'rain_intensity', 'aqi', and a 'datetime' column.

try:
    # Load the dataset
    # Make sure the 'datetime' column is parsed correctly upon loading.
    df = pd.read_csv(DATA_FILE, parse_dates=['datetime'])
    print("Dataset loaded successfully.")
    print(f"Dataset shape: {df.shape}")
    print("\nFirst 5 rows of the dataset:")
    print(df.head())
    print("\nDataset Info:")
    df.info()
except FileNotFoundError:
    print(f"Error: The file '{DATA_FILE}' was not found.")


Dataset loaded successfully.
Dataset shape: (3650, 15)

First 5 rows of the dataset:
             datetime   timestamp   temp  humidity  pressure  rain_analog  \
0 2024-04-24 00:42:00  1713899520  30.65      30.0    1007.6           98   
1 2024-04-24 02:42:00  1713906720  28.65      44.0    1008.3           97   
2 2024-04-24 04:13:00  1713912180  26.65      30.0    1015.5           98   
3 2024-04-24 07:54:00  1713925440  28.65      31.0    1009.2           98   
4 2024-04-24 09:07:00  1713929820  32.65      40.0    1012.4           97   

   rain_detected  light_intensity  nh3     co  co2  alcohol    lpg    ch4  aqi  
0          False             10.0  2.3  127.0  0.5      0.9  256.4  252.1  171  
1          False             14.0  3.3  146.0  0.6      1.0  269.4  272.5  196  
2          False              0.0  2.2  128.0  0.5      0.9  245.2  248.4  169  
3          False            258.0  3.6  154.0  0.7      1.2  264.6  269.2  218  
4          False            302.0  2.5  138.0  

In [4]:
# ==============================================================================
# CELL 4: FEATURE ENGINEERING
# ==============================================================================
# We create new features from the 'datetime' column. Time-based features like
# the hour of the day or month of the year can help the model detect cyclical
# weather patterns (e.g., daily temperature cycles).
df['precipitation'] = 1 - (df['rain_analog'] / 1023.0)

df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['month'] = df['datetime'].dt.month
df['day_of_year'] = df['datetime'].dt.dayofyear

# Create cyclical features for time, which help the model understand time's cyclical nature
# (e.g., hour 23 is close to hour 0).
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

print("Time-based features created:")
print(df[['datetime', 'hour', 'hour_sin', 'hour_cos', 'month', 'month_sin', 'month_cos']].head())



Time-based features created:
             datetime  hour  hour_sin  hour_cos  month  month_sin  month_cos
0 2024-04-24 00:42:00     0  0.000000  1.000000      4   0.866025       -0.5
1 2024-04-24 02:42:00     2  0.500000  0.866025      4   0.866025       -0.5
2 2024-04-24 04:13:00     4  0.866025  0.500000      4   0.866025       -0.5
3 2024-04-24 07:54:00     7  0.965926 -0.258819      4   0.866025       -0.5
4 2024-04-24 09:07:00     9  0.707107 -0.707107      4   0.866025       -0.5


In [5]:
df.head()

Unnamed: 0,datetime,timestamp,temp,humidity,pressure,rain_analog,rain_detected,light_intensity,nh3,co,...,aqi,precipitation,hour,day_of_week,month,day_of_year,hour_sin,hour_cos,month_sin,month_cos
0,2024-04-24 00:42:00,1713899520,30.65,30.0,1007.6,98,False,10.0,2.3,127.0,...,171,0.904203,0,2,4,115,0.0,1.0,0.866025,-0.5
1,2024-04-24 02:42:00,1713906720,28.65,44.0,1008.3,97,False,14.0,3.3,146.0,...,196,0.905181,2,2,4,115,0.5,0.866025,0.866025,-0.5
2,2024-04-24 04:13:00,1713912180,26.65,30.0,1015.5,98,False,0.0,2.2,128.0,...,169,0.904203,4,2,4,115,0.866025,0.5,0.866025,-0.5
3,2024-04-24 07:54:00,1713925440,28.65,31.0,1009.2,98,False,258.0,3.6,154.0,...,218,0.904203,7,2,4,115,0.965926,-0.258819,0.866025,-0.5
4,2024-04-24 09:07:00,1713929820,32.65,40.0,1012.4,97,False,302.0,2.5,138.0,...,195,0.905181,9,2,4,115,0.707107,-0.707107,0.866025,-0.5


In [6]:
df.columns

Index(['datetime', 'timestamp', 'temp', 'humidity', 'pressure', 'rain_analog',
       'rain_detected', 'light_intensity', 'nh3', 'co', 'co2', 'alcohol',
       'lpg', 'ch4', 'aqi', 'precipitation', 'hour', 'day_of_week', 'month',
       'day_of_year', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos'],
      dtype='object')

In [8]:
# ==============================================================================
# CELL 5: DEFINE FEATURES AND TARGETS & NORMALIZE DATA
# ==============================================================================
# We define which columns will be our input features (X) and which we want to
# predict (y). We then scale the data to a range between 0 and 1.
# Normalization is essential for neural networks to train effectively.

# Define the columns we will use as input features for the model
FEATURE_COLS = [
    'temp', 'humidity', 'pressure', 'rain_analog',
    'light_intensity', 'aqi', 'precipitation', 'hour', 'day_of_week', 'month', 'day_of_year',
    'hour_sin', 'hour_cos', 'month_sin', 'month_cos'
]

# Define the columns we want the model to predict
TARGET_COLS = ['temp', 'precipitation', 'aqi']

# Ensure all specified columns exist in the DataFrame
df_model = df[FEATURE_COLS].copy()

# Initialize scalers for features and targets
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()

# Fit and transform the data
scaled_features = feature_scaler.fit_transform(df_model)
scaled_targets = target_scaler.fit_transform(df_model[TARGET_COLS])

print("Data scaling complete.")
print(f"Scaled features shape: {scaled_features.shape}")
print(f"Scaled targets shape: {scaled_targets.shape}")

# Save the scalers to disk for later use in the Flask app
joblib.dump(feature_scaler, os.path.join(MODEL_DIR, 'feature_scaler.pkl'))
joblib.dump(target_scaler, os.path.join(MODEL_DIR, 'target_scaler.pkl'))
print(f"Scalers saved to '{MODEL_DIR}'")


Data scaling complete.
Scaled features shape: (3650, 15)
Scaled targets shape: (3650, 3)
Scalers saved to 'models/saved_model'


In [9]:
# ==============================================================================
# CELL 6: CREATE TIME-SERIES SEQUENCES
# ==============================================================================
# LSTMs require data to be in a sequence format of [samples, timesteps, features].
# This function converts our flat data into these sequences.
# For each sample, we take `lookback` number of historical steps as input (X)
# and the next `forecast_horizon` steps as the output to predict (y).

def create_sequences(features, targets, lookback, horizon):
    X, y = [], []
    for i in range(len(features) - lookback - horizon + 1):
        X.append(features[i:(i + lookback)])
        y.append(targets[i + lookback:i + lookback + horizon])
    return np.array(X), np.array(y)

X_seq, y_seq = create_sequences(scaled_features, scaled_targets, LOOKBACK_WINDOW, FORECAST_HORIZON)

print("Time-series sequences created.")
print(f"Shape of X_seq: {X_seq.shape}")
print(f"Shape of y_seq: {y_seq.shape}")


Time-series sequences created.
Shape of X_seq: (3620, 24, 15)
Shape of y_seq: (3620, 7, 3)


In [10]:

# ==============================================================================
# CELL 7: SPLIT DATA INTO TRAINING AND VALIDATION SETS
# ==============================================================================
# We split our sequenced data into a training set (for teaching the model) and
# a validation set (for evaluating its performance on unseen data).
# A 80/20 split is a common practice.

split_ratio = 0.8
split_index = int(len(X_seq) * split_ratio)

X_train, X_val = X_seq[:split_index], X_seq[split_index:]
y_train, y_val = y_seq[:split_index], y_seq[split_index:]

print("Data split into training and validation sets:")
print(f"  - X_train shape: {X_train.shape}")
print(f"  - y_train shape: {y_train.shape}")
print(f"  - X_val shape: {X_val.shape}")
print(f"  - y_val shape: {y_val.shape}")


Data split into training and validation sets:
  - X_train shape: (2896, 24, 15)
  - y_train shape: (2896, 7, 3)
  - X_val shape: (724, 24, 15)
  - y_val shape: (724, 7, 3)


In [11]:
# ==============================================================================
# CELL 8: BUILD THE RNN-LSTM MODEL
# ==============================================================================
# Here we define the architecture of our neural network.
# - LSTM layers are the core of our model for processing sequences.
# - Dropout layers help prevent overfitting by randomly ignoring some neurons
#   during training.
# - The final Dense layer outputs the prediction.

input_shape = (X_train.shape[1], X_train.shape[2])
output_shape = (y_train.shape[1], y_train.shape[2])

model = Sequential([
    LSTM(100, input_shape=input_shape, return_sequences=True),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(output_shape[0] * output_shape[1]), # Number of outputs = horizon * num_targets
    tf.keras.layers.Reshape(output_shape)
])

# Compile the model with an optimizer and a loss function
model.compile(optimizer='adam', loss='mean_squared_error')

print("Model architecture built successfully.")
model.summary()

Model architecture built successfully.


  super().__init__(**kwargs)


In [12]:
# ==============================================================================
# CELL 9: TRAIN THE MODEL
# ==============================================================================
# This is where the training happens. The model learns by comparing its
# predictions to the actual target values and adjusting its internal weights.
# - ModelCheckpoint: Saves the best version of the model (with the lowest
#   validation loss) to a file.
# - EarlyStopping: Stops training if the model's performance on the validation
#   set doesn't improve for a certain number of epochs.

# Define the path to save the final model. Using the .keras format.
model_filepath = os.path.join(MODEL_DIR, 'weather_forecast_model.keras')

# Callbacks
checkpoint = ModelCheckpoint(
    filepath=model_filepath,
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10, # Stop if no improvement after 10 epochs
    restore_best_weights=True,
    verbose=1
)

print("\nStarting model training...")
history = model.fit(
    X_train, y_train,
    epochs=50, # You can increase this for better performance
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[checkpoint, early_stopping],
    verbose=1
)

print("\nModel training complete.")
print(f"The best model has been saved to: {model_filepath}")



Starting model training...
Epoch 1/50
[1m89/91[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 0.0784
Epoch 1: val_loss improved from inf to 0.01610, saving model to models/saved_model/weather_forecast_model.keras
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 0.0776 - val_loss: 0.0161
Epoch 2/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0340
Epoch 2: val_loss improved from 0.01610 to 0.01481, saving model to models/saved_model/weather_forecast_model.keras
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0340 - val_loss: 0.0148
Epoch 3/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0302
Epoch 3: val_loss improved from 0.01481 to 0.01261, saving model to models/saved_model/weather_forecast_model.keras
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 0.0302 - val_loss: 0.0126
Epoc

In [13]:
# ==============================================================================
# CELL 10: EVALUATE THE MODEL (OPTIONAL)
# ==============================================================================
# Let's load our best saved model and make a quick prediction to see how it
# performs on a sample from the validation set.

from tensorflow.keras.models import load_model

# Load the best model saved by ModelCheckpoint
saved_model = load_model(model_filepath)
print("\nSaved model loaded successfully.")

# Make a prediction on the first sample of the validation set
sample_prediction_scaled = saved_model.predict(X_val[0:1])

# Inverse transform the prediction and the actual value to see the real numbers
sample_prediction_actual = target_scaler.inverse_transform(sample_prediction_scaled.reshape(-1, len(TARGET_COLS)))
sample_ground_truth_actual = target_scaler.inverse_transform(y_val[0].reshape(-1, len(TARGET_COLS)))

print("\n--- Sample Prediction vs. Actual Value ---")
print(f"Prediction for the next {FORECAST_HORIZON} steps:")
pred_df = pd.DataFrame(sample_prediction_actual, columns=TARGET_COLS)
print(pred_df)

print(f"\nActual values for the next {FORECAST_HORIZON} steps:")
actual_df = pd.DataFrame(sample_ground_truth_actual, columns=TARGET_COLS)
print(actual_df)

print("\nNotebook execution finished. You can find the 'weather_forecast_model.keras' file and the scalers in the 'models/saved_model' directory.")



Saved model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step

--- Sample Prediction vs. Actual Value ---
Prediction for the next 7 steps:
        temp  precipitation         aqi
0  13.807881       0.912625  225.450546
1  11.673524       0.907679  216.125671
2   9.972904       0.905211  233.393402
3  10.716677       0.907886  264.518188
4  14.512443       0.908905  254.675842
5  19.964024       0.910661  221.786804
6  22.776392       0.913705  227.504349

Actual values for the next 7 steps:
    temp  precipitation    aqi
0  10.65       0.902248  259.0
1   8.70       0.902248  282.0
2   6.85       0.902248  302.0
3   8.75       0.904203  327.0
4  12.50       0.904203  325.0
5  18.25       0.904203  280.0
6  22.05       0.906158  254.0

Notebook execution finished. You can find the 'weather_forecast_model.keras' file and the scalers in the 'models/saved_model' directory.
