# Weather Prediction with Machine Learning

In [1]:
#Import libraries
import pandas as pd

# Load the dataset
df = pd.read_csv("weatherHistory.csv")

# Display basic info
print("Shape of dataset:", df.shape)
print("\nColumn names:", df.columns.tolist())
print("\nFirst 5 rows:")
print(df.head())

# Check for missing values
print("\nMissing values per column:")
print(df.isnull().sum())

Shape of dataset: (96453, 12)

Column names: ['Formatted Date', 'Summary', 'Precip Type', 'Temperature (C)', 'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'Wind Bearing (degrees)', 'Visibility (km)', 'Loud Cover', 'Pressure (millibars)', 'Daily Summary']

First 5 rows:
                  Formatted Date        Summary Precip Type  Temperature (C)  \
0  2006-04-01 00:00:00.000 +0200  Partly Cloudy        rain         9.472222   
1  2006-04-01 01:00:00.000 +0200  Partly Cloudy        rain         9.355556   
2  2006-04-01 02:00:00.000 +0200  Mostly Cloudy        rain         9.377778   
3  2006-04-01 03:00:00.000 +0200  Partly Cloudy        rain         8.288889   
4  2006-04-01 04:00:00.000 +0200  Mostly Cloudy        rain         8.755556   

   Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \
0                  7.388889      0.89            14.1197   
1                  7.227778      0.86            14.2646   
2                  9.377778      0.89             3.92

In [2]:
#Data Cleaning and Feature Selection
import numpy as np

# Drop columns that are not useful for prediction
df = df.drop(columns=['Formatted Date', 'Summary', 'Daily Summary', 'Loud Cover'])

# Handle missing values in 'Precip Type' by filling with mode
df['Precip Type'] = df['Precip Type'].fillna(df['Precip Type'].mode()[0])

# Convert categorical column 'Precip Type' into numerical form
df['Precip Type'] = df['Precip Type'].map({'rain': 0, 'snow': 1})

# Define features (X) and target (y)
X = df.drop(columns=['Temperature (C)'])
y = df['Temperature (C)']

print("Shape of features (X):", X.shape)
print("Shape of target (y):", y.shape)

# Display first few rows of processed data
print("\nProcessed data sample:")
print(X.head())


Shape of features (X): (96453, 7)
Shape of target (y): (96453,)

Processed data sample:
   Precip Type  Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \
0            0                  7.388889      0.89            14.1197   
1            0                  7.227778      0.86            14.2646   
2            0                  9.377778      0.89             3.9284   
3            0                  5.944444      0.83            14.1036   
4            0                  6.977778      0.83            11.0446   

   Wind Bearing (degrees)  Visibility (km)  Pressure (millibars)  
0                   251.0          15.8263               1015.13  
1                   259.0          15.8263               1015.63  
2                   204.0          14.9569               1015.94  
3                   269.0          15.8263               1016.41  
4                   259.0          15.8263               1016.51  


In [3]:
#Train and Evaluate Models
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
lr = LinearRegression()
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train models
lr.fit(X_train, y_train)
rf.fit(X_train, y_train)

# Predict on test data
y_pred_lr = lr.predict(X_test)
y_pred_rf = rf.predict(X_test)

# Evaluate models
def evaluate_model(y_true, y_pred, name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"\n{name} Model Performance:")
    print(f"MAE: {mae:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"R² Score: {r2:.3f}")

evaluate_model(y_test, y_pred_lr, "Linear Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest Regressor")



Linear Regression Model Performance:
MAE: 0.742
RMSE: 0.948
R² Score: 0.990

Random Forest Regressor Model Performance:
MAE: 0.012
RMSE: 0.044
R² Score: 1.000


In [None]:
#Build Gradio Interface for Weather Prediction
import gradio as gr
import numpy as np

def predict_temperature(precip_type, apparent_temp, humidity, wind_speed, wind_bearing, visibility, pressure):
    # Convert categorical input for precip_type
    precip_value = 0 if precip_type.lower() == "rain" else 1
    input_data = np.array([[precip_value, apparent_temp, humidity, wind_speed, wind_bearing, visibility, pressure]])
    predicted_temp = rf.predict(input_data)[0]
    return f"Predicted Temperature: {predicted_temp:.2f} °C"

# Define Gradio interface
interface = gr.Interface(
    fn=predict_temperature,
    inputs=[
        gr.Radio(["rain", "snow"], label="Precipitation Type"),
        gr.Slider(-20, 50, step=0.1, label="Apparent Temperature (°C)"),
        gr.Slider(0, 1, step=0.01, label="Humidity"),
        gr.Slider(0, 100, step=0.1, label="Wind Speed (km/h)"),
        gr.Slider(0, 360, step=1, label="Wind Bearing (degrees)"),
        gr.Slider(0, 20, step=0.1, label="Visibility (km)"),
        gr.Slider(900, 1100, step=0.5, label="Pressure (millibars)")
    ],
    outputs="text",
    title="Weather Prediction with Machine Learning",
    description="Enter weather parameters to predict temperature using a trained Random Forest model."
)

interface.launch(share=True)


Running on local URL:  http://127.0.0.1:7860


--------



Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




