In [1]:
import pandas as pd
import pickle
from pymongo import MongoClient
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

In [2]:
client = MongoClient("mongodb://localhost:27017")
db = client["weather_db"]
collection = db["new_york_hourly"]

In [3]:
with open("../models/cloudcover_model.pkl", "rb") as f:
    model_cloud = pickle.load(f)

with open("../models/temperature_model.pkl", "rb") as f:
    model_temp = pickle.load(f)

with open("../models/precipitation_model.pkl", "rb") as f:
    model_prec = pickle.load(f)

In [None]:
def fetch_day_df(collection, day_str: str):

    day_start = datetime.strptime(day_str, "%Y-%m-%d")
    day_end = day_start + timedelta(days=1)

    cursor = collection.find({"date": {"$gte": day_start, "$lt": day_end}})
    return pd.DataFrame(list(cursor))

In [None]:
def predict_weather_next_day(selected_date: str):

    next_day = (datetime.strptime(selected_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")

    df_input = fetch_day_df(collection, selected_date)
    if df_input.empty:
        print(f"‚ö†Ô∏è Kh√¥ng c√≥ d·ªØ li·ªáu cho ng√†y {selected_date} trong DB")
        return None

    X_base = df_input.drop(columns=["_id", "date"], errors="ignore")

    X_cloud = X_base.drop(columns=["cloudcover", "cloudcover_next", "temperature_next", "precipitation_next"], errors="ignore")
    X_temp  = X_base.drop(columns=["temperature", "temperature_next", "cloudcover_next", "precipitation_next"], errors="ignore")
    X_prec  = X_base.drop(columns=["precipitation", "precipitation_next", "cloudcover_next", "temperature_next"], errors="ignore")

    df_result = pd.DataFrame({
        "date": [next_day]*len(X_base),
        "pred_cloudcover": model_cloud.predict(X_cloud),
        "pred_temperature": model_temp.predict(X_temp),
        "pred_precipitation": model_prec.predict(X_prec)
    })

    return df_result


In [14]:
def test_prediction(collection, selected_date: str,
                    model_cloud, model_temp, model_prec, n_samples: int = 5):
 
    result = predict_weather_next_day(selected_date)

    if result is not None and not result.empty:
        print(f"üìä D·ª± ƒëo√°n cho ng√†y mai c·ªßa {selected_date}")
        print(result.head(n_samples))
    else:
        print(f"‚ö†Ô∏è Kh√¥ng c√≥ d·ªØ li·ªáu ho·∫∑c kh√¥ng d·ª± ƒëo√°n ƒë∆∞·ª£c cho ng√†y {selected_date}")


In [18]:
test_prediction(collection, "2025-04-10", model_cloud, model_temp, model_prec, n_samples=5)


üìä D·ª± ƒëo√°n cho ng√†y mai c·ªßa 2025-04-10
         date  pred_cloudcover  pred_temperature  pred_precipitation
0  2025-04-11        37.061923          7.420667            0.106092
1  2025-04-11        10.533169          4.629253            0.016719
2  2025-04-11         9.044248          4.024253            0.068350
3  2025-04-11         4.828733          4.525753            0.008043
4  2025-04-11        18.884995          7.321853            0.028946
