In [40]:
# 🌦️ City-Wise Weather Predictor using ML + Real-Time API
# This notebook forecasts the next 7 days of temperature based on historical city weather data, and compares today's forecast with real-time temperature from OpenWeatherMap.

# 🔽 Enter the city name (must match a CSV filename in the "data" folder)
city_name = "chennai"  # e.g., Bangalore, Chennai


In [41]:
# 📍 Weather Forecasting Tool (Advanced Model)

# 👋 Welcome! This tool predicts the average temperature for the next 7 days for any Indian city.
# 🔍 It uses historical weather data + machine learning to generate a forecast.

# ✅ Instructions:
# 1. Place your city CSV file inside a folder named `data/`
# 2. The file name should be the city name in lowercase (e.g., `chennai.csv`, `delhi.csv`)
# 3. Run all cells below to get your forecast


In [42]:
import pandas as pd
import os

# 📂 Set up data path
data_path = "data"
file_path = os.path.join(data_path, f"{city_name}.csv")

# ✅ Load data or show error
if os.path.exists(file_path):
    df = pd.read_csv(file_path)
    print(f"\n✅ Loaded data for '{city_name}' successfully!")
    print("\n📊 Preview of the dataset:")
    print(df.head())
else:
    print(f"❌ File not found: {file_path}")



✅ Loaded data for 'chennai' successfully!

📊 Preview of the dataset:
         time  tavg  tmin  tmax  prcp
0  01-01-1990  25.2  22.8  28.4   0.5
1  02-01-1990  24.9  21.7  29.1   0.0
2  03-01-1990  25.6  21.4  29.8   0.0
3  04-01-1990  25.7   NaN  28.7   0.0
4  05-01-1990  25.5  20.7  28.4   0.0


In [43]:
# 📊 Clean and prepare data

# Drop rows where 'tavg' (target variable) is missing
df = df.dropna(subset=['tavg'])

# Fill missing values in predictors
df['tmin'] = df['tmin'].fillna(df['tmin'].mean())
df['tmax'] = df['tmax'].fillna(df['tmax'].mean())
df['prcp'] = df['prcp'].fillna(0.0)  # Assume 0 if rain data missing

# Convert time to datetime format
df['time'] = pd.to_datetime(df['time'], dayfirst=True)

# Create features
df['month'] = df['time'].dt.month
df['day_of_year'] = df['time'].dt.dayofyear

# 🎯 Define features and label
features = ['month', 'day_of_year', 'tmin', 'tmax', 'prcp']
X = df[features]
y = df['tavg']

# Preview
print("✅ Data preprocessing complete. Sample features:")
X.head()


✅ Data preprocessing complete. Sample features:


Unnamed: 0,month,day_of_year,tmin,tmax,prcp
0,1,1,22.8,28.4,0.5
1,1,2,21.7,29.1,0.0
2,1,3,21.4,29.8,0.0
3,1,4,24.38277,28.7,0.0
4,1,5,20.7,28.4,0.0


In [44]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# 🧪 Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🌲 Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 📈 Predict and evaluate
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"✅ Model trained. RMSE: {round(rmse, 2)} °C")


✅ Model trained. RMSE: 0.78 °C


In [45]:
# 📅 Generate future dates
from datetime import timedelta

# Create future dates (next 7 days)
future_dates = [datetime.now().date() + timedelta(days=i) for i in range(0, 7)]

# Build future features DataFrame
future_df = pd.DataFrame({
    'date': future_dates,
    'month': [date.month for date in future_dates],
    'day_of_year': [date.timetuple().tm_yday for date in future_dates],
    'tmin': [df['tmin'].mean()] * 7,
    'tmax': [df['tmax'].mean()] * 7,
    'prcp': [0.0] * 7
})

# Predict
future_df['predicted_tavg'] = model.predict(future_df.drop(columns=['date']))


# 📋 Show forecast
# 📅 Pretty-print the forecast
from tabulate import tabulate

forecast_table = []

for i in range(7):
    date_str = future_dates[i].strftime("%A, %d %b %Y")  # e.g., Monday, 29 Jul 2025
    forecast_table.append([f"Day {i+1}", date_str, f"{future_predictions[i]:.2f} °C"])

# Use tabulate for a clean table output
print("\n🌤️  7-Day Weather Forecast:\n")
print(tabulate(forecast_table, headers=["Day", "Date", "Predicted Avg Temp"], tablefmt="fancy_grid"))




🌤️  7-Day Weather Forecast:

╒═══════╤════════════════════════╤══════════════════════╕
│ Day   │ Date                   │ Predicted Avg Temp   │
╞═══════╪════════════════════════╪══════════════════════╡
│ Day 1 │ Tuesday, 29 Jul 2025   │ 30.18 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 2 │ Wednesday, 30 Jul 2025 │ 30.08 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 3 │ Thursday, 31 Jul 2025  │ 29.86 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 4 │ Friday, 01 Aug 2025    │ 29.92 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 5 │ Saturday, 02 Aug 2025  │ 29.85 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 6 │ Sunday, 03 Aug 2025    │ 29.37 °C             │
├───────┼────────────────────────┼──────────────────────┤
│ Day 7 │ Monday, 04 Aug 2025    │ 29.49 °C             │
╘═══════╧════════════════════════╧════════

In [46]:
import requests

# Replace with your actual API key
API_KEY = "0f9cfae2d33a54218206cb6e42040f11"

# Use the city name entered earlier
def get_real_time_weather(city, api_key):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        weather = {
            "description": data["weather"][0]["description"].title(),
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "wind_speed": data["wind"]["speed"]
        }
        return weather
    else:
        return {"error": f"Failed to fetch data: {response.status_code}"}

# Example usage
real_time_weather = get_real_time_weather(city_name, API_KEY)

# Display the result
if "error" in real_time_weather:
    print(real_time_weather["error"])
else:
    print(f"\n🌤️ Real-Time Weather in {city_name}:")
    print(f"Temperature: {real_time_weather['temperature']}°C")
    print(f"Description: {real_time_weather['description']}")
    print(f"Humidity: {real_time_weather['humidity']}%")
    print(f"Wind Speed: {real_time_weather['wind_speed']} m/s")



🌤️ Real-Time Weather in chennai:
Temperature: 31.15°C
Description: Overcast Clouds
Humidity: 50%
Wind Speed: 6.85 m/s


In [47]:
from datetime import datetime

# Get today's date in the prediction
today = datetime.now().date()

# Find predicted value for today
today_prediction_row = future_df[future_df['date'] == today]

if not today_prediction_row.empty and "error" not in real_time_weather:
    predicted_temp = round(today_prediction_row['predicted_tavg'].values[0], 2)
    actual_temp = real_time_weather['temperature']
    
    print(f"\n🔍 Comparison for {today.strftime('%B %d, %Y')}:")
    print(f"Predicted Temperature: {predicted_temp}°C")
    print(f"Real-Time Temperature: {actual_temp}°C")
    print(f"Difference: {round(abs(predicted_temp - actual_temp), 2)}°C")
else:
    print("❗ Unable to compare because today's prediction or real-time data is unavailable.")



🔍 Comparison for July 29, 2025:
Predicted Temperature: 30.5°C
Real-Time Temperature: 31.15°C
Difference: 0.65°C


In [48]:
print("\n✅ All steps completed successfully!")
print(f"📉 Model trained using Random Forest Regressor with an RMSE of {round(rmse, 2)} °C")
print("📅 Forecast generated for the next 7 calendar days based on historical weather patterns.")
print("\n📂 To test with a different city, just change the 'city_name' at the top and re-run all cells.")



✅ All steps completed successfully!
📉 Model trained using Random Forest Regressor with an RMSE of 0.78 °C
📅 Forecast generated for the next 7 calendar days based on historical weather patterns.

📂 To test with a different city, just change the 'city_name' at the top and re-run all cells.
