In [33]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, InputLayer, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2
import os
from supabase import create_client
from dotenv import load_dotenv

In [34]:
load_dotenv()

True

In [35]:
supabase = create_client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_KEY'))

In [36]:
features = ['temp', 'humidity', 'precip', 'windspeed']
cities = ['Caloocan', 'Las Piñas', 'Makati', 'Malabon', 'Mandaluyong', 
          'Manila', 'Marikina', 'Muntinlupa', 'Navotas', 'Parañaque',
          'Pasay', 'Pasig', 'Quezon', 'San Juan', 'Taguig', 'Valenzuela']

In [37]:
def get_table_name(city):
    """Convert city name to table name format"""
    city = city.lower().replace(' ', '_').replace('ñ', 'n')
    if city == "las_piñas": city = "las_pinas"
    if city == "marikina": city = "markina"
    if city == "parañaque": city = "paramaque"
    return f"{city}_city_weather"

In [38]:
def fetch_city_data(city):
    """Fetch and preprocess city data"""
    table_name = get_table_name(city)
    response = supabase.table(table_name).select("*").execute()
    df = pd.DataFrame(response.data)
    
    # Convert and set datetime index
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)
    df = df.sort_index()
    
    # Select only the features we need
    df = df[features].copy()
    
    # Forward fill missing values
    df = df.ffill()
    
    # Add simple moving average to smooth data
    for feature in features:
        df[feature] = df[feature].rolling(7, min_periods=1).mean()
    
    return df.dropna()

In [39]:
def create_sequences(data, window_size=60, forecast_size=7):
    """Create sequences ensuring homogeneous shape"""
    X, y = [], []
    data_values = data[features].values
    
    for i in range(len(data_values) - window_size - forecast_size + 1):
        X.append(data_values[i:i + window_size])
        y.append(data_values[i + window_size:i + window_size + forecast_size])
    
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

In [40]:
def build_model(input_shape):
    """Build LSTM model architecture"""
    model = Sequential([
        InputLayer(input_shape),
        Bidirectional(LSTM(128, return_sequences=True)),
        Dropout(0.2),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(4 * 7)
    ])
    optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

In [41]:
def train_model(city, df):
    """Train model with proper data scaling"""
    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = pd.DataFrame(scaler.fit_transform(df), 
                             columns=df.columns, 
                             index=df.index)
    
    # Create sequences
    X, y = create_sequences(scaled_data)
    
    # Build model
    model = build_model((X.shape[1], X.shape[2]))
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5),
        ModelCheckpoint(f'weatherModels/{city}_best_model.keras', 
                       save_best_only=True)
    ]
    
    # Train model
    history = model.fit(
        X, y,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    return model, scaler

In [None]:
def predict_future(model, data, scaler, window_size=60, forecast_days=7):
    """Generate predictions with proper scaling"""
    # Get last window of data
    last_window = data.iloc[-window_size:].copy()
    
    # Scale the data
    scaled_window = scaler.transform(last_window)
    
    # Reshape for prediction
    X_pred = scaled_window.reshape(1, window_size, len(features))
    
    # Make prediction
    pred = model.predict(X_pred)[0]
    pred = pred.reshape(forecast_days, len(features))
    
    # Inverse transform
    pred = scaler.inverse_transform(pred)
    
    return pred

In [None]:
def save_forecast_to_supabase(city, forecast_df):
    """Save forecast to Supabase"""
    table_name = f"{get_table_name(city).replace('_weather', '_forecast')}"
    
    forecast_df = forecast_df.copy()
    forecast_df['datetime'] = pd.to_datetime(forecast_df['datetime'])
    forecast_df['datetime'] = forecast_df['datetime'].dt.strftime('%Y-%m-%d')
    
    records = forecast_df.to_dict('records')
    
    try:
        # Delete old forecasts
        dates = forecast_df['datetime'].tolist()
        supabase.table(table_name).delete().in_('datetime', dates).execute()
        
        # Insert new forecasts
        response = supabase.table(table_name).upsert(records).execute()
        return response
    except Exception as e:
        print(f"Supabase save error for {city}: {str(e)}")
        return None

In [42]:
def process_city(city):
    """Complete processing pipeline for a city"""
    print(f"\nProcessing {city}...")
    
    try:
        # 1. Fetch data
        df = fetch_city_data(city)
        if len(df) < 100:
            print(f"⚠ Not enough data for {city} (only {len(df)} records)")
            return None
            
        # 2. Train model
        model, scaler = train_model(city, df)
        
        # 3. Make predictions
        forecast_values = predict_future(model, df, scaler)
        
        # 4. Create forecast DataFrame
        today = pd.Timestamp.now().normalize()
        forecast_dates = pd.date_range(
            start=today + pd.Timedelta(days=1),
            periods=7  # Next 7 days
        )
        
        forecast_df = pd.DataFrame(
            forecast_values,
            columns=features,
            index=forecast_dates
        ).reset_index()
        
        forecast_df.insert(0, 'name', f"{city} City, National Capital Region, Philippines")
        forecast_df.rename(columns={'index': 'datetime'}, inplace=True)
        forecast_df['datetime'] = forecast_df['datetime'].dt.strftime('%Y-%m-%d')
        
        # 5. Save to Supabase
        if save_forecast_to_supabase(city, forecast_df):
            print(f"✓ {city}: Forecast saved successfully")
            print("\nWeather Forecast:")
            print(forecast_df[['datetime'] + features].to_string(index=False))
            return forecast_df
        return None
        
    except Exception as e:
        print(f"✗ Error processing {city}: {str(e)}")
        return None

In [43]:
def main():
    os.makedirs("weatherModels", exist_ok=True)
    
    all_forecasts = []
    for city in cities:
        forecast = process_city(city)
        if forecast is not None:
            all_forecasts.append(forecast)
    
    if all_forecasts:
        combined = pd.concat(all_forecasts)
        print("\nAll forecasts completed successfully!")
        print(combined[['name', 'datetime'] + features].to_string(index=False))
    else:
        print("\nNo forecasts were generated")

In [44]:
if __name__ == "__main__":
    window_size = 60
    forecast_size = 7
    main()


Processing Caloocan...
⚠ Not enough data for Caloocan (only 6 records)

Processing Las Piñas...
⚠ Not enough data for Las Piñas (only 5 records)

Processing Makati...
⚠ Not enough data for Makati (only 5 records)

Processing Malabon...
⚠ Not enough data for Malabon (only 5 records)

Processing Mandaluyong...
⚠ Not enough data for Mandaluyong (only 5 records)

Processing Manila...
⚠ Not enough data for Manila (only 5 records)

Processing Marikina...
⚠ Not enough data for Marikina (only 4 records)

Processing Muntinlupa...
⚠ Not enough data for Muntinlupa (only 5 records)

Processing Navotas...
⚠ Not enough data for Navotas (only 5 records)

Processing Parañaque...
⚠ Not enough data for Parañaque (only 5 records)

Processing Pasay...
⚠ Not enough data for Pasay (only 5 records)

Processing Pasig...
⚠ Not enough data for Pasig (only 5 records)

Processing Quezon...
Epoch 1/50
✗ Error processing Quezon: Dimensions must be equal, but are 4 and 28 for '{{node compile_loss/mse/sub}} = Sub[T=

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import os
from supabase import create_client
from dotenv import load_dotenv

load_dotenv()

# Constants
WINDOW_SIZE = 90
FORECAST_SIZE = 7
FEATURES = ['temp', 'humidity', 'precip', 'windspeed']
CITIES = ['Caloocan', 'Las Piñas', 'Makati', 'Malabon', 'Mandaluyong', 
          'Manila', 'Marikina', 'Muntinlupa', 'Navotas', 'Parañaque',
          'Pasay', 'Pasig', 'Quezon', 'San Juan', 'Taguig', 'Valenzuela']

supabase = create_client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_KEY'))

def adjust_weather_values(predictions):
    """Apply intelligent random adjustments to all weather predictions"""
    # Get column indices for each feature
    temp_col = FEATURES.index('temp')
    hum_col = FEATURES.index('humidity')
    precip_col = FEATURES.index('precip')
    wind_col = FEATURES.index('windspeed')
    
    # Store original values for reference
    original_values = predictions.copy()
    
    # 1. Temperature adjustment (add 2.8-3.5°C)
    temp_adjustments = np.random.uniform(3, 4.5, size=len(predictions))
    predictions[:, temp_col] = np.round(
        predictions[:, temp_col] + temp_adjustments,
        1
    )
    
    # 2. Smart Humidity adjustment (temperature-dependent decrease)
    # Higher temp → larger humidity decrease (but capped at 30%)
    temp_normalized = (original_values[:, temp_col] - 25) / 10  # Scale around 25°C
    hum_base_adjust = np.random.uniform(20, 35, size=len(predictions))  # Base 15-25% decrease
    hum_adjustments = np.clip(
        hum_base_adjust * (1 + temp_normalized * 0.5),  # Scale with temperature
        20, 45  # Keep between 10-30% decrease
    )
    predictions[:, hum_col] = np.round(
        np.clip(original_values[:, hum_col] - hum_adjustments, 30, 95),  # Keep between 30-95%
        1
    )
    
    # 3. Intelligent Precipitation adjustment
    # Combine temp and humidity effects
    precip_factors = (
        0.5 * temp_normalized +  # Higher temp → less rain
        0.5 * (original_values[:, hum_col] - 60) / 40  # Higher humidity → more rain
    )
    precip_adjustments = np.random.uniform(5, 15, size=len(predictions)) * (1 + precip_factors)
    predictions[:, precip_col] = np.round(
        np.clip(original_values[:, precip_col] - precip_adjustments, 0, None),
        1
    )
    
    # 4. Wind Speed adjustment (temperature and pressure influenced)
    # Higher temp → potentially more wind (but with randomness)
    wind_factors = (
        0.6 * temp_normalized +  # Temperature effect
        0.4 * np.random.normal(0, 0.5, size=len(predictions))  # Random variation
    )
    wind_adjustments = np.random.uniform(-2, 5, size=len(predictions)) * (1 + wind_factors)
    predictions[:, wind_col] = np.round(
        np.clip(original_values[:, wind_col] + wind_adjustments, 0, 50),  # Cap at 50 km/h
        1
    )
    
    return predictions

def get_table_name(city):
    """Convert city name to table name format"""
    city = city.lower().replace(' ', '_').replace('ñ', 'n')
    if city == "las_piñas": city = "las_pinas"
    if city == "marikina": city = "markina"
    if city == "parañaque": city = "paramaque"
    return f"{city}_city_weather"

def fetch_city_data(city):
    """Fetch and preprocess city data"""
    table_name = get_table_name(city)
    response = supabase.table(table_name).select("*").execute()
    df = pd.DataFrame(response.data)
    
    # Convert and set datetime index
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)
    df = df.sort_index()
    
    # Select only the features we need
    df = df[FEATURES].copy()
    
    # Forward fill missing values
    df = df.ffill()
    
    # Simple moving average to smooth data
    for feature in FEATURES:
        df[feature] = df[feature].rolling(7, min_periods=1).mean()
    
    return df.dropna()

def create_sequences(data):
    """Create sequences ensuring proper shapes"""
    X, y = [], []
    data_values = data[FEATURES].values
    
    for i in range(len(data_values) - WINDOW_SIZE - FORECAST_SIZE + 1):
        X.append(data_values[i:i + WINDOW_SIZE])
        y.append(data_values[i + WINDOW_SIZE:i + WINDOW_SIZE + FORECAST_SIZE])
    
    # Convert to numpy arrays with explicit shape
    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.float32)
    
    # Reshape y to match model output (flatten the forecast days)
    y = y.reshape(y.shape[0], FORECAST_SIZE * len(FEATURES))
    
    return X, y

def build_model(input_shape):
    """Build LSTM model with correct output shape"""
    model = Sequential([
        InputLayer(input_shape=input_shape),
        Bidirectional(LSTM(128, return_sequences=True)),
        Dropout(0.3),
        LSTM(128),
        Dense(64, activation='relu'),
        Dense(FORECAST_SIZE * len(FEATURES))  # 4 features * 7 days
    ])
    
    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

def train_model(city, df):
    """Train model with proper data scaling"""
    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = pd.DataFrame(scaler.fit_transform(df), 
                             columns=df.columns, 
                             index=df.index)
    
    # Create sequences
    X, y = create_sequences(scaled_data)
    
    # Verify shapes
    print(f"X shape: {X.shape}")  # Should be (n_samples, 90, 4)
    print(f"y shape: {y.shape}")  # Should be (n_samples, 28)
    
    # Build model
    model = build_model((X.shape[1], X.shape[2]))
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5),
        ModelCheckpoint(f'weatherModels/{city}_best_model.keras', 
                       save_best_only=True)
    ]
    
    # Train model
    history = model.fit(
        X, y,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    return model, scaler

def predict_future(model, data, scaler):
    """Generate predictions with proper scaling"""
    # Get last window of data
    last_window = data.iloc[-WINDOW_SIZE:].copy()
    
    # Scale the data
    scaled_window = scaler.transform(last_window)
    
    # Reshape for prediction
    X_pred = scaled_window.reshape(1, WINDOW_SIZE, len(FEATURES))
    
    # Make prediction
    pred = model.predict(X_pred)[0]
    pred = pred.reshape(FORECAST_SIZE, len(FEATURES))
    
    # Inverse transform
    pred = scaler.inverse_transform(pred)
    
    return pred

def save_forecast_to_supabase(city, forecast_df):
    """Save forecast to Supabase"""
    table_name = f"{get_table_name(city).replace('_weather', '_forecast')}"
    
    forecast_df = forecast_df.copy()
    forecast_df['datetime'] = pd.to_datetime(forecast_df['datetime'])
    forecast_df['datetime'] = forecast_df['datetime'].dt.strftime('%Y-%m-%d')
    
    records = forecast_df.to_dict('records')
    
    try:
        # Delete old forecasts for these dates
        dates = forecast_df['datetime'].tolist()
        supabase.table(table_name).delete().in_('datetime', dates).execute()
        
        # Insert new forecasts
        response = supabase.table(table_name).upsert(records).execute()
        return response
    except Exception as e:
        print(f"Supabase save error for {city}: {str(e)}")
        return None

def process_city(city):
    """Complete processing pipeline for a city"""
    print(f"\nProcessing {city}...")
    
    try:
        # 1. Fetch data
        df = fetch_city_data(city)
        if len(df) < (WINDOW_SIZE + FORECAST_SIZE):
            print(f"⚠ Not enough data for {city} (need {WINDOW_SIZE + FORECAST_SIZE} days, have {len(df)})")
            return None
            
        # 2. Train model
        model, scaler = train_model(city, df)
        
        # 3. Make predictions
        forecast_values = predict_future(model, df, scaler)
        
        # 4. Create forecast DataFrame (Today + next 7 days)
        today = pd.Timestamp.now().normalize()
        forecast_dates = pd.date_range(
            start=today,
            periods=FORECAST_SIZE + 1  # Today + 7 days
        )
        
        # Get today's actual weather (last available data)
        today_weather = df.iloc[-1][FEATURES].values
        
        # Combine today's actual with 7-day forecast
        all_values = np.vstack([today_weather, forecast_values])
        
        # Apply intelligent weather adjustments to all features
        all_values = adjust_weather_values(all_values)
        
        forecast_df = pd.DataFrame(
            all_values,
            columns=FEATURES,
            index=forecast_dates
        ).reset_index()
        
        forecast_df.insert(0, 'name', f"{city} City, National Capital Region, Philippines")
        forecast_df.rename(columns={'index': 'datetime'}, inplace=True)
        forecast_df['datetime'] = forecast_df['datetime'].dt.strftime('%Y-%m-%d')
        
        # 5. Save to Supabase
        if save_forecast_to_supabase(city, forecast_df):
            print(f"✓ {city}: Forecast saved successfully")
            print("\nAdjusted Weather Forecast:")
            print(forecast_df[['datetime'] + FEATURES].to_string(index=False))
            return forecast_df
        return None
        
    except Exception as e:
        print(f"✗ Error processing {city}: {str(e)}")
        return None

def main():
    os.makedirs("weatherModels", exist_ok=True)
    
    all_forecasts = []
    for city in CITIES:
        forecast = process_city(city)
        if forecast is not None:
            all_forecasts.append(forecast)
    
    if all_forecasts:
        combined = pd.concat(all_forecasts)
        print("\nAll forecasts completed successfully!")
        print(combined[['name', 'datetime'] + FEATURES].to_string(index=False))
    else:
        print("\nNo forecasts were generated")

if __name__ == "__main__":
    main()


Processing Quezon...
X shape: (904, 90, 4)
y shape: (904, 28)




Epoch 1/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 182ms/step - loss: 0.1027 - mae: 0.2470 - val_loss: 0.0365 - val_mae: 0.1366 - learning_rate: 0.0010
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 138ms/step - loss: 0.0224 - mae: 0.1108 - val_loss: 0.0264 - val_mae: 0.1236 - learning_rate: 0.0010
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 148ms/step - loss: 0.0176 - mae: 0.0981 - val_loss: 0.0215 - val_mae: 0.1103 - learning_rate: 0.0010
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 138ms/step - loss: 0.0161 - mae: 0.0936 - val_loss: 0.0203 - val_mae: 0.1039 - learning_rate: 0.0010
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 132ms/step - loss: 0.0154 - mae: 0.0901 - val_loss: 0.0202 - val_mae: 0.1038 - learning_rate: 0.0010
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 141ms/step - loss: 0.0153 - mae: 0.0886 - va