1. DATA COLLECTION

In [4]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta

API_KEY = "9618b15b404c16cb81c2bcf7608ca142"
CITY = "DELHI"

def fetch_weather_data():
    records = []
    for hour_offset in range(5, 0, -1):
        date_time = int((datetime.now() - timedelta(hours=hour_offset)).timestamp())
        url = f"https://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch for hour offset {hour_offset}")
            continue
        data = response.json()
        record = {
            "city": CITY,
            "date_time": datetime.now() - timedelta(hours=hour_offset),
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "wind_speed": data["wind"]["speed"],
            "wind_direction": data["wind"].get("deg", 0),
            "pressure": data["main"]["pressure"],
            "precipitation": data.get("rain", {}).get("1h", 0),
            "cloud_coverage": data["clouds"]["all"],
            "weather_condition": data["weather"][0]["main"]
        }
        records.append(record)

    if records:
        df = pd.DataFrame(records)
        os.makedirs("data/processed", exist_ok=True)
        csv_file = "data/processed/weather_data.csv"
        write_header = not os.path.exists(csv_file)
        df.to_csv(csv_file, mode='a', index=False, header=write_header)
        print(f"Weather data for {CITY} is saved.")
    
if __name__ == '__main__':
    fetch_weather_data()

Weather data for DELHI is saved.


2. MODEL TRAINING

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import os

def train_model():
    df = pd.read_csv("data/processed/weather_data.csv")
    df.dropna(inplace=True)

    if len(df) < 5:
        print("Not enough data to train the model. Please check your dataset.")
        return

    X = df[["temperature", "humidity", "wind_speed", "wind_direction", "pressure", "precipitation", "cloud_coverage"]]
    y = df["weather_condition"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    os.makedirs("models",exist_ok=True)
    joblib.dump(model,"models/weather_model.pkl")
    print("Model for trained and saved.")

if __name__ == '__main__':
    train_model()

Accuracy: 1.0
Model for trained and saved.


3. PREDICTION SCRIPT




In [9]:
import os
import pandas as pd
import joblib
from datetime import datetime


def predict_new():
    df = pd.read_csv("data/processed/weather_data.csv")
    
    latest = df.tail(5).mean(numeric_only=True).to_frame().T[["temperature", "humidity", "wind_speed", "wind_direction", "pressure", "precipitation", "cloud_coverage"]]

    model = joblib.load("models/weather_model.pkl")
    prediction = model.predict(latest)[0]

    print("Predicted Weather Condition:", prediction)
    result = pd.DataFrame({"city":[CITY], "date_time": [datetime.now()], "forecasted_weather": [prediction]})
    os.makedirs("data/predictions", exist_ok=True)
    result.to_csv("data/predictions/forecast.csv", mode='a', index=False, header=False)


if __name__ == '__main__':
    predict_new()

Predicted Weather Condition: Clouds


4. SCHEDULER

In [None]:
import schedule
import time

def run_pipeline():
    fetch_weather_data()
    train_model()
    predict_new()

def start_scheduler():
    schedule.every(1).minutes.do(run_pipeline)
    print("Scheduler started. Running every minutes.\n")

    while True:
        schedule.run_pending()
        time.sleep(60)

start_scheduler()

Scheduler started. Running every minutes.

Weather data for DELHI is saved.
Accuracy: 1.0
Model for trained and saved.
Predicted Weather Condition: Clouds
Weather data for DELHI is saved.
Accuracy: 1.0
Model for trained and saved.
Predicted Weather Condition: Clouds
