In [41]:
import pandas as pd
import numpy as np
import pickle
from pymongo import MongoClient
from datetime import timedelta

In [42]:
import joblib

with open("../models/random_forest_regr.pkl", "rb") as f:
    regr = pickle.load(f)

with open("../models/regr_prec.pkl", "rb") as f:
    regr_prec = pickle.load(f)

with open("../models/random_forest_clf.pkl", "rb") as f:
    clf = pickle.load(f)

with open("../models/cloud_cover_regr.pkl", "rb") as f:
    cloud_model = joblib.load(f)


In [43]:
weather_code_mapping = {
    0: "Clear sky",
    1: "Mainly clear",
    2: "Partly cloudy",
    3: "Overcast",
    45: "Fog",
    48: "Depositing rime fog",
    51: "Light drizzle",
    53: "Moderate drizzle",
    55: "Dense drizzle",
    56: "Freezing drizzle (light)",
    57: "Freezing drizzle (dense)",
    61: "Slight rain",
    63: "Moderate rain",
    65: "Heavy rain",
    66: "Freezing rain (light)",
    67: "Freezing rain (heavy)",
    71: "Slight snowfall",
    73: "Moderate snowfall",
    75: "Heavy snowfall",
    77: "Snow grains",
    80: "Rain showers (slight)",
    81: "Rain showers (moderate)",
    82: "Rain showers (violent)",
    85: "Snow showers (slight)",
    86: "Snow showers (heavy)",
    95: "Thunderstorm",
    96: "Thunderstorm with hail (slight)",
    99: "Thunderstorm with hail (heavy)"
}

In [44]:
def refresh_df():
    client = MongoClient("mongodb://localhost:27017")
    db = client["weather_db"]
    collection = db["new_york_hourly"]
    records = list(collection.find())
    df_new = pd.DataFrame(records)
    df_new['date'] = pd.to_datetime(df_new['date'])
    df_new = df_new.set_index('date').sort_index()
    return df_new

In [45]:
df = refresh_df()
df.head()   

Unnamed: 0_level_0,_id,temperature,relative_humidity,dew_point,apparent_temperature,surface_pressure,wind_speed,precipitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01 05:00:00,692c3925f732aeb815c4e312,1.5725,75.576302,-2.2775,-2.1702,1009.878906,8.8548,0.0
2024-01-01 06:00:00,692c3925f732aeb815c4e313,2.5725,74.100601,-1.5775,-1.4437,1009.901978,11.3842,0.0
2024-01-01 07:00:00,692c3925f732aeb815c4e314,2.6725,74.391296,-1.4275,-1.0761,1010.003723,9.6933,0.0
2024-01-01 08:00:00,692c3925f732aeb815c4e315,2.5225,75.741203,-1.3275,-0.9782,1009.801514,8.09,0.0
2024-01-01 09:00:00,692c3925f732aeb815c4e316,0.4725,88.367401,-1.2275,-2.9309,1009.555176,7.517,0.0


In [46]:
# Auto-generate input vector
def get_input_vector(last, n_features):

    X = [float(v) if isinstance(v, (int, float, np.number)) else 0.0 for v in last.values]

    if len(X) < n_features:
        X.extend([0.0]*(n_features - len(X)))
    elif len(X) > n_features:
        X = X[:n_features]  

    return np.array([X])


In [47]:
# Function to predict tomorrow's weather
def predict_tomorrow():

    df = refresh_df()
    last = df.iloc[-1]

    # Temperature
    X_temp = get_input_vector(last, regr.n_features_in_)
    pred_temp = float(regr.predict(X_temp)[0])

    # Precipitation
    X_prec = get_input_vector(last, regr_prec.n_features_in_)
    pred_prec = float(regr_prec.predict(X_prec)[0])

    # Cloud cover
    X_cloud = get_input_vector(last, cloud_model.n_features_in_)
    pred_cloud = float(cloud_model.predict(X_cloud)[0])

    # Weather code
    X_clf = get_input_vector(last, clf.n_features_in_)
    pred_code = int(clf.predict(X_clf)[0])
    description = weather_code_mapping.get(pred_code, "Unknown")

    return pd.DataFrame({
        "Feature": ["Temperature", "Precipitation", "Cloud Cover", "Weather Code", "Description"],
        "Prediction": [pred_temp, pred_prec, pred_cloud, pred_code, description]
    })


In [48]:
df = predict_tomorrow()
df.head()



Unnamed: 0,Feature,Prediction
0,Temperature,-5.707391
1,Precipitation,0.022128
2,Cloud Cover,9.360929
3,Weather Code,2
4,Description,Partly cloudy


In [49]:
import time
import threading

def run_prediction_every_5min(interval_seconds=300):
    while True:
        result_df = predict_tomorrow()
        print("=== Weather Prediction ===")
        print(result_df.to_string())  
        time.sleep(interval_seconds)