#### importing the necessary libraries

In [2]:
# 1. Install Dependencies (Run once)
!pip install meteostat scikit-learn imbalanced-learn pandas numpy matplotlib



In [2]:
from meteostat import Point, Daily, Hourly
from datetime import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from imblearn.over_sampling import RandomOverSampler
import seaborn as sns
import schedule
import time
import warnings

In [3]:
#initialising warnings
warnings.filterwarnings("ignore", message="Support for nested sequences for 'parse_dates'")
warnings.filterwarnings("ignore", message="'H' is deprecated")

### RANDOM FOREST REGRESSION MODEL

In [5]:
def run_weather_forecast():
    # 1. Fetch Weather Data (Pune)
    location = Point(18.5204, 73.8567)  # Pune
    start = datetime(2015, 1, 1)
    end = datetime.now()

    # Daily Data
    daily_data = Daily(location, start, end).fetch().reset_index()
    daily_data.rename(columns={
        'time': 'date_time',
        'tavg': 'temperature',
        'prcp': 'precipitation',
        'wspd': 'wind_speed',
        'pres': 'pressure'
    }, inplace=True)
    daily_data = daily_data[['date_time', 'temperature', 'tmin', 'tmax', 'precipitation', 'wind_speed', 'pressure']]
    daily_data.fillna(daily_data.median(numeric_only=True), inplace=True)

    # Hourly Humidity
    hourly_data = Hourly(location, start, end).fetch().reset_index()
    hourly_data['date_time'] = hourly_data['time'].dt.date
    humidity = hourly_data.groupby('date_time')['rhum'].mean().reset_index()
    humidity.rename(columns={'rhum': 'humidity'}, inplace=True)
    humidity['date_time'] = pd.to_datetime(humidity['date_time'])

    # Merge
    df = pd.merge(daily_data, humidity, on='date_time', how='left')

    # Targets
    df['forecasted_temperature'] = df['temperature'].shift(-1)
    max_precip = df['precipitation'].max()
    df['precipitation_probability'] = df['precipitation'] / (max_precip + 0.001)
    df['forecasted_precip_prob'] = df['precipitation_probability'].shift(-1)
    df['weather_condition'] = np.where(df['precipitation'] > 1.0, 'rainy', 'clear')
    df['forecasted_condition'] = df['weather_condition'].shift(-1)

    # Time features
    df['month'] = df['date_time'].dt.month
    df['day_of_year'] = df['date_time'].dt.dayofyear
    df['season'] = df['month'] % 12 // 3 + 1
    df['is_monsoon'] = df['month'].isin([6, 7, 8, 9]).astype(int)

    df.dropna(inplace=True)

    # Features
    features = ['temperature', 'tmin', 'tmax', 'wind_speed', 'pressure',
                'precipitation', 'humidity', 'month', 'day_of_year', 'season', 'is_monsoon']

    X = df[features]

    # Regression - Temperature
    y_temp = df['forecasted_temperature']
    X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X, y_temp, test_size=0.2, random_state=42)

    # Regression - Precipitation
    y_precip = df['forecasted_precip_prob']
    X_train_precip, X_test_precip, y_train_precip, y_test_precip = train_test_split(X, y_precip, test_size=0.2, random_state=42)

    # Classification - Condition
    y_cond = df['forecasted_condition']
    X_train_cond, X_test_cond, y_train_cond, y_test_cond = train_test_split(X, y_cond, test_size=0.2, random_state=42)

    ros = RandomOverSampler(random_state=42)
    X_resampled, y_resampled = ros.fit_resample(X_train_cond, y_train_cond)

    # Models
    reg_temp = RandomForestRegressor(n_estimators=100, random_state=42)
    reg_temp.fit(X_train_temp, y_train_temp)

    reg_precip = RandomForestRegressor(n_estimators=100, random_state=42)
    reg_precip.fit(X_train_precip, y_train_precip)

    cls_cond = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
    cls_cond.fit(X_resampled, y_resampled)

    # Evaluate
    y_pred_temp = reg_temp.predict(X_test_temp)
    y_pred_precip = reg_precip.predict(X_test_precip)
    y_pred_cond = cls_cond.predict(X_test_cond)

    rmse_temp = np.sqrt(mean_squared_error(y_test_temp, y_pred_temp))
    rmse_precip = np.sqrt(mean_squared_error(y_test_precip, y_pred_precip))
    acc_cond = accuracy_score(y_test_cond, y_pred_cond)

    print(f"\n✅ Regression - Temperature: RMSE = {rmse_temp:.2f} °C")
    print(f"✅ Regression - Precipitation: RMSE = {rmse_precip:.3f}")
    print(f"✅ Classification - Weather Condition Accuracy: {acc_cond:.2%}")
    print(classification_report(y_test_cond, y_pred_cond))

    # Save latest predictions
    df['predicted_temperature'] = reg_temp.predict(X)
    df['predicted_precip_prob'] = reg_precip.predict(X)
    df['predicted_condition'] = cls_cond.predict(X)

    df.to_csv("pune_rf_weather_predictions.csv", index=False)
    print("\n📁 Predictions saved to pune_rf_weather_predictions.csv")

    # Optional: Plot Feature Importances for Temperature
    importances = reg_temp.feature_importances_
    plt.figure(figsize=(10, 6))
    plt.barh(features, importances)
    plt.title("Feature Importance for Temperature Prediction")
    plt.xlabel("Importance")
    plt.show()

    # Optional: Confusion Matrix for Classification
    cm = confusion_matrix(y_test_cond, y_pred_cond, labels=cls_cond.classes_)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=cls_cond.classes_)
    disp.plot()
    plt.title("Confusion Matrix: Weather Condition Classification")
    plt.show()

    # -----------------------------
    # 1. Predicted vs Actual: Temperature
    # -----------------------------
    plt.figure(figsize=(8, 5))
    plt.scatter(y_test_temp, y_pred_temp, alpha=0.6, color='dodgerblue')
    plt.plot([y_test_temp.min(), y_test_temp.max()], [y_test_temp.min(), y_test_temp.max()], 'r--')
    plt.title("Temperature: Actual vs Predicted")
    plt.xlabel("Actual Temperature (°C)")
    plt.ylabel("Predicted Temperature (°C)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    # -----------------------------
    # 2. Predicted vs Actual: Precipitation Probability
    # -----------------------------
    plt.figure(figsize=(8, 5))
    
    # Plot actual values (pink)
    plt.scatter(range(len(y_test_precip)), y_test_precip, alpha=0.6, color='blue', label='Actual Probability')
    
    # Plot predicted values (red)
    plt.scatter(range(len(y_pred_precip)), y_pred_precip, alpha=0.6, color='red', label='Predicted Probability')
    
    plt.title("Precipitation Probability: Actual vs Predicted")
    plt.xlabel("Sample Index")
    plt.ylabel("Probability")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    # -----------------------------
    # 3. Confusion Matrix: Weather Condition
    # -----------------------------
    cm = confusion_matrix(y_test_cond, y_pred_cond, labels=cls_cond.classes_)
    
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=cls_cond.classes_,
                yticklabels=cls_cond.classes_)
    plt.title("Confusion Matrix: Weather Condition")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

In [6]:
# Scheduler - Run every day at 07:05 AM
schedule.every().day.at("07:05").do(run_weather_forecast)

Every 1 day at 07:05:00 do run_weather_forecast() (last run: [never], next run: 2025-04-13 07:05:00)

In [None]:
print("🕖 Scheduler is running. Waiting for 07:05 AM every day...")
while True:
    schedule.run_pending()
    time.sleep(60)

🕖 Scheduler is running. Waiting for 07:05 AM every day...
