In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


In [5]:
df = pd.read_csv("cleaned_weather_data.csv")
df["date_time"] = pd.to_datetime(df["date_time"])
df.head()


Unnamed: 0,date_time,temperature,humidity,wind_speed,wind_direction,pressure,precipitation,cloud_coverage,weather_condition,forecasted_weather,year,month,day
0,2012-01-01,8.9,88,4.7,63,1008.2,0.0,33,drizzle,drizzle,2012,1,1
1,2012-01-02,6.7,78,4.5,129,1020.1,10.9,25,rain,rain,2012,1,2
2,2012-01-03,9.45,64,2.3,309,1014.7,0.8,46,rain,drizzle,2012,1,3
3,2012-01-04,8.9,92,4.7,192,1011.3,20.3,4,rain,rain,2012,1,4
4,2012-01-05,5.85,57,6.1,306,1014.3,1.3,89,rain,sun,2012,1,5


In [6]:
df = df.drop(columns=["date_time", "weather_condition"])
label_encoder = LabelEncoder()
df["forecasted_weather_encoded"] = label_encoder.fit_transform(df["forecasted_weather"])

X = df.drop(columns=["forecasted_weather", "forecasted_weather_encoded"])
y = df["forecasted_weather_encoded"]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [8]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report, accuracy_score
import numpy as np

# Load data
df = pd.read_csv("cleaned_weather_data.csv")

# Create binary rain/no-rain column
df["rain_tomorrow"] = (df["precipitation"] > 0.5).astype(int)

# Drop unnecessary or duplicated columns
df.drop(columns=["date_time", "forecasted_weather"], inplace=True)

# Encode weather_condition
le = LabelEncoder()
df["weather_condition_encoded"] = le.fit_transform(df["weather_condition"])
df.drop(columns=["weather_condition"], inplace=True)

# Features and targets
features = ['humidity', 'wind_speed', 'wind_direction', 'pressure', 'precipitation', 
            'cloud_coverage', 'year', 'month', 'day']

X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#  1. Temperature Prediction 
y_temp = df["temperature"]
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_temp, test_size=0.2, random_state=42)

temp_model = LinearRegression()
temp_model.fit(X_train, y_train)
temp_preds = temp_model.predict(X_test)
print("\n[Temperature Prediction]")
print("RMSE:", np.sqrt(mean_squared_error(y_test, temp_preds)))

#  2. Rain Chance Prediction
y_rain = df["rain_tomorrow"]
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_rain, test_size=0.2, random_state=42)

rain_model = LogisticRegression()
rain_model.fit(X_train, y_train)
rain_preds = rain_model.predict(X_test)
print("\n[Rain Prediction]")
print("Accuracy:", accuracy_score(y_test, rain_preds))
print(classification_report(y_test, rain_preds))

# 3. Weather Condition Classification 
y_condition = df["weather_condition_encoded"]
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_condition, test_size=0.2, random_state=42)

condition_model = RandomForestClassifier(n_estimators=100, random_state=42)
condition_model.fit(X_train, y_train)
condition_preds = condition_model.predict(X_test)
print("\n[Weather Condition Prediction]")
print("Accuracy:", accuracy_score(y_test, condition_preds))
print(classification_report(y_test, condition_preds, target_names=le.classes_))



[Temperature Prediction]
RMSE: 5.850432543236666

[Rain Prediction]
Accuracy: 0.9453924914675768
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       189
           1       1.00      0.85      0.92       104

    accuracy                           0.95       293
   macro avg       0.96      0.92      0.94       293
weighted avg       0.95      0.95      0.94       293


[Weather Condition Prediction]
Accuracy: 0.8361774744027304
              precision    recall  f1-score   support

     drizzle       0.00      0.00      0.00         9
         fog       0.57      0.16      0.25        25
        rain       0.93      0.93      0.93       120
        snow       0.00      0.00      0.00         8
         sun       0.78      0.98      0.87       131

    accuracy                           0.84       293
   macro avg       0.46      0.42      0.41       293
weighted avg       0.78      0.84      0.79       293



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
