# Weather Forecast Bias Correction using Deep Learning

This notebook implements a hybrid deep learning approach for weather forecast bias correction, combining:
1. LSTM for temporal pattern learning
2. Graph Neural Network for spatial relationships
3. Attention mechanism for feature fusion
4. Monte Carlo dropout for uncertainty estimation

## Setup and Installation

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Clone the repo if not already done
!rm -rf weather-bias-correction-dl
!git clone [https://github.com/MaheshSharan/weather-bias-correction-dl.git](https://github.com/MaheshSharan/weather-bias-correction-dl.git)

# Change to project directory
%cd weather-bias-correction-dl

# Uninstall any existing installation
!pip uninstall -y weather_bias_correction

# Install dependencies and package in editable mode
!pip install -r requirements.txt
!pip install -e .

# Setup project paths
import os
import sys

# Add the project directory to Python path
PROJECT_DIR = os.getcwd()
if PROJECT_DIR not in sys.path:
    sys.path.append(PROJECT_DIR)

# Create necessary directories
DATA_DIR = os.path.join(PROJECT_DIR, 'data')
MODELS_DIR = os.path.join(PROJECT_DIR, 'models')
LOGS_DIR = os.path.join(PROJECT_DIR, 'logs')

for dir_path in [DATA_DIR, MODELS_DIR, LOGS_DIR]:
    os.makedirs(dir_path, exist_ok=True)



## Data Download and Preprocessing

In [None]:
from src.data.simple_openmeteo import SimpleOpenMeteoDownloader
from src.data.isd_lite_downloader import ISDLiteDownloader  # Changed from GSOD
from src.data.data_alignment import DataAligner
import os

# Define data directory
DATA_DIR = 'data'  # Adjust this path as needed for Colab

# Initialize downloaders with your date range
openmeteo_downloader = SimpleOpenMeteoDownloader(
    output_dir=os.path.join(DATA_DIR, 'raw', 'openmeteo'),
    start_date='2018-01-01',
    end_date='2023-12-31'
)

isd_downloader = ISDLiteDownloader(  # Changed from GSODDownloader
    output_dir=os.path.join(DATA_DIR, 'raw', 'isd'),  # Changed from 'gsod'
    start_date='2018-01-01',
    end_date='2023-12-31'
)

# Define your locations
locations = [
    {"name": "London", "lat": 51.5074, "lon": -0.1278, "isd_station": "037720-99999"},  # Heathrow
    {"name": "Paris", "lat": 48.8566, "lon": 2.3522, "isd_station": "071490-99999"},    # Charles de Gaulle
    # Add more locations as needed
]

# Download data
print("Downloading Open-Meteo data...")
for location in locations:
    print(f"\nDownloading data for {location['name']}...")
    openmeteo_file = openmeteo_downloader.download_data(
        latitude=location['lat'],
        longitude=location['lon'],
        location_name=location['name']
    )

print("\nDownloading ISD-Lite data...")
for location in locations:
    print(f"\nDownloading ISD data for {location['name']}...")
    isd_file = isd_downloader.download_data(
        station_id=location['isd_station']
    )

# Align data
print("\nAligning datasets...")
aligner = DataAligner(
    openmeteo_dir=os.path.join(DATA_DIR, 'raw', 'openmeteo'),
    isd_dir=os.path.join(DATA_DIR, 'raw', 'isd'),  # Changed from gsod_dir
    output_dir=os.path.join(DATA_DIR, 'processed')
)

# Align data for each location
for location in locations:
    print(f"\nAligning data for {location['name']}...")
    output_file = f"{location['name']}_2018-01-01_2023-12-31_aligned.csv"
    aligner.align_data(
        openmeteo_file=os.path.join(DATA_DIR, 'raw', 'openmeteo', f"{location['name']}_2018-01-01_2023-12-31.csv"),
        isd_file=os.path.join(DATA_DIR, 'raw', 'isd', f"{location['isd_station']}.csv"),  # Changed from gsod_file
        output_file=output_file
    )

## Model Training

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from src.models.bias_correction_model import DeepBiasCorrectionModel
from src.training.train import train_model

# Set up model parameters
model_params = {
    'input_dim': 5,  # Number of input features
    'hidden_dim': 256,
    'num_layers': 3,
    'dropout_rate': 0.2,
    'learning_rate': 1e-3,
    'weight_decay': 1e-5
}

# Initialize model
model = DeepBiasCorrectionModel(**model_params)

# Set up callbacks
checkpoint_callback = ModelCheckpoint(
    dirpath=MODELS_DIR,
    filename='bias_correction-{epoch:02d}-{val_loss:.2f}',
    monitor='val_loss',
    mode='min',
    save_top_k=3
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    mode='min'
)

# Set up logger
logger = TensorBoardLogger(LOGS_DIR, name='bias_correction')

# Train model
trainer = pl.Trainer(
    max_epochs=100,
    accelerator='gpu',
    devices=1,
    callbacks=[checkpoint_callback, early_stopping],
    logger=logger,
    gradient_clip_val=0.5
)

trainer.fit(model)

## Model Evaluation and Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from src.app.visualization import plot_predictions, plot_uncertainty

# Load best model
best_model = DeepBiasCorrectionModel.load_from_checkpoint(
    checkpoint_callback.best_model_path
)

# Make predictions with uncertainty
predictions, uncertainties = best_model.predict_with_uncertainty(test_data)

# Plot results
plot_predictions(test_data, predictions)
plot_uncertainty(test_data, predictions, uncertainties)

## Save Results to Drive

In [None]:
# Save model and results
import torch
import pandas as pd

# Save model state
torch.save(best_model.state_dict(), os.path.join(MODELS_DIR, 'final_model.pth'))

# Save predictions
results_df = pd.DataFrame({
    'actual': test_data.y,
    'predicted': predictions,
    'uncertainty': uncertainties
})
results_df.to_csv(os.path.join(PROJECT_DIR, 'predictions.csv'))

print("Model and results saved to Google Drive!")