### importing the required libraries

In [2]:
from meteostat import Point, Daily, Hourly
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor, XGBClassifier
import warnings
import schedule
import time

In [3]:
warnings.filterwarnings("ignore")

## XG BOOST MACHINE LEARNING MODEL

In [5]:
def run_weather_forecast():
    print("\n⏰ Job started at", datetime.now())

    # Fetch weather data
    location = Point(18.5204, 73.8567)  # Pune
    start = datetime(2015, 1, 1)
    end = datetime.now()

    # Daily weather
    daily_data = Daily(location, start, end).fetch().reset_index()
    daily_data.rename(columns={
        'time': 'date_time',
        'tavg': 'temperature',
        'prcp': 'precipitation',
        'wspd': 'wind_speed',
        'pres': 'pressure'
    }, inplace=True)
    daily_data = daily_data[['date_time', 'temperature', 'tmin', 'tmax', 'precipitation', 'wind_speed', 'pressure']]
    daily_data.fillna(daily_data.median(numeric_only=True), inplace=True)

    # Hourly humidity
    hourly_data = Hourly(location, start, end).fetch().reset_index()
    hourly_data['date_time'] = hourly_data['time'].dt.date
    humidity = hourly_data.groupby('date_time')['rhum'].mean().reset_index()
    humidity.rename(columns={'rhum': 'humidity'}, inplace=True)
    humidity['date_time'] = pd.to_datetime(humidity['date_time'])

    # Merge
    df = pd.merge(daily_data, humidity, on='date_time', how='left')

    # Add targets
    df['forecasted_temperature'] = df['temperature'].shift(-1)
    max_precip = df['precipitation'].max()
    df['precipitation_probability'] = df['precipitation'] / (max_precip + 0.001)
    df['forecasted_precip_prob'] = df['precipitation_probability'].shift(-1)
    df['weather_condition'] = np.where(df['precipitation'] > 1.0, 'rainy', 'clear')
    df['forecasted_condition'] = df['weather_condition'].shift(-1)

    # Time features
    df['month'] = df['date_time'].dt.month
    df['day_of_year'] = df['date_time'].dt.dayofyear
    df['season'] = df['month'] % 12 // 3 + 1
    df['is_monsoon'] = df['month'].isin([6, 7, 8, 9]).astype(int)

    # Drop NaNs
    df.dropna(inplace=True)

    # Features and targets
    features = ['temperature', 'tmin', 'tmax', 'wind_speed', 'pressure',
                'precipitation', 'humidity', 'month', 'day_of_year', 'season', 'is_monsoon']
    target_temp = 'forecasted_temperature'
    target_precip = 'forecasted_precip_prob'
    target_cond = 'forecasted_condition'

    # Encode classification target
    le = LabelEncoder()
    df['forecasted_condition_encoded'] = le.fit_transform(df['forecasted_condition'])

    X = df[features]

    # Regression targets
    y_temp = df[target_temp]
    y_precip = df[target_precip]

    # Classification target
    y_cond = df['forecasted_condition_encoded']

    # Train/Test split
    X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X, y_temp, test_size=0.2, random_state=42)
    X_train_precip, X_test_precip, y_train_precip, y_test_precip = train_test_split(X, y_precip, test_size=0.2, random_state=42)
    X_train_cond, X_test_cond, y_train_cond, y_test_cond = train_test_split(X, y_cond, test_size=0.2, random_state=42)

    # Oversample for classification
    ros = RandomOverSampler(random_state=42)
    X_resampled, y_resampled = ros.fit_resample(X_train_cond, y_train_cond)

    # Models
    reg_temp = XGBRegressor(n_estimators=100, random_state=42)
    reg_precip = XGBRegressor(n_estimators=100, random_state=42)
    cls_cond = XGBClassifier(n_estimators=100, random_state=42)

    # Train
    reg_temp.fit(X_train_temp, y_train_temp)
    reg_precip.fit(X_train_precip, y_train_precip)
    cls_cond.fit(X_resampled, y_resampled)

    # Predict
    df['predicted_temperature'] = reg_temp.predict(X)
    df['predicted_precip_prob'] = reg_precip.predict(X)
    df['predicted_condition_encoded'] = cls_cond.predict(X)
    df['predicted_condition'] = le.inverse_transform(df['predicted_condition_encoded'])

    # Evaluate
    y_pred_temp = reg_temp.predict(X_test_temp)
    y_pred_precip = reg_precip.predict(X_test_precip)
    y_pred_cond = cls_cond.predict(X_test_cond)

    rmse_temp = np.sqrt(mean_squared_error(y_test_temp, y_pred_temp))
    rmse_precip = np.sqrt(mean_squared_error(y_test_precip, y_pred_precip))
    acc_cond = accuracy_score(y_test_cond, y_pred_cond)

    print(f"\n🌡️ Temperature Prediction RMSE: {rmse_temp:.2f} °C")
    print(f"🌧️ Precipitation Probability RMSE: {rmse_precip:.3f}")
    print(f"🌤️ Weather Condition Accuracy: {acc_cond:.2%}")
    print(classification_report(y_test_cond, y_pred_cond, target_names=le.classes_))

    # Confusion Matrix
    cm = confusion_matrix(y_test_cond, y_pred_cond)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)
    disp.plot()
    plt.title("Confusion Matrix - XGBoost Classifier")
    plt.show()

    # Export CSV
    df.to_csv("pune_weather_forecast_with_predictions.csv", index=False)
    print("\n📁 CSV exported: pune_weather_forecast_with_predictions.csv")
    print("✅ Job finished at", datetime.now())
    df.tail(1)

In [6]:
# Schedule job daily at 7:05 AM
schedule.every().day.at("07:05").do(run_weather_forecast)

Every 1 day at 07:05:00 do run_weather_forecast() (last run: [never], next run: 2025-04-13 07:05:00)

In [None]:
print("📅 Scheduler running... Waiting for 7:05 AM daily job...")
while True:
    schedule.run_pending()
    time.sleep(60)

📅 Scheduler running... Waiting for 7:05 AM daily job...
