In [103]:
# ------------------- ML MODEL PREP & SAVE ENCODER -------------------
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import pickle, joblib



In [105]:
# Load dataset
df = pd.read_csv("seattle-weather.csv")
df


Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


In [107]:
df.isnull().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

In [109]:
df.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind
count,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136
std,6.680194,7.349758,5.023004,1.437825
min,0.0,-1.6,-7.1,0.4
25%,0.0,10.6,4.4,2.2
50%,0.0,15.6,8.3,3.0
75%,2.8,22.2,12.2,4.0
max,55.9,35.6,18.3,9.5


In [111]:
df.info

<bound method DataFrame.info of             date  precipitation  temp_max  temp_min  wind  weather
0     2012-01-01            0.0      12.8       5.0   4.7  drizzle
1     2012-01-02           10.9      10.6       2.8   4.5     rain
2     2012-01-03            0.8      11.7       7.2   2.3     rain
3     2012-01-04           20.3      12.2       5.6   4.7     rain
4     2012-01-05            1.3       8.9       2.8   6.1     rain
...          ...            ...       ...       ...   ...      ...
1456  2015-12-27            8.6       4.4       1.7   2.9     rain
1457  2015-12-28            1.5       5.0       1.7   1.3     rain
1458  2015-12-29            0.0       7.2       0.6   2.6      fog
1459  2015-12-30            0.0       5.6      -1.0   3.4      sun
1460  2015-12-31            0.0       5.6      -2.1   3.5      sun

[1461 rows x 6 columns]>

In [112]:
# Encode weather labels
le = LabelEncoder()
df['weather_encoded'] = le.fit_transform(df['weather'])
joblib.dump(le, "label_encoder.pkl")  # Save encoder




['label_encoder.pkl']

In [113]:
le

In [114]:
# Binary classification: rain or not rain
df['weather_binary'] = df['weather'].apply(lambda x: 1 if x == 'rain' else 0)



In [119]:
df['weather_binary']

0       0
1       1
2       1
3       1
4       1
       ..
1456    1
1457    1
1458    0
1459    0
1460    0
Name: weather_binary, Length: 1461, dtype: int64

In [121]:
# Features & target
X = df[['precipitation', 'temp_max', 'temp_min', 'wind']]
y = df['weather_binary']



In [123]:
# Split & train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)



In [124]:
X_train


Unnamed: 0,precipitation,temp_max,temp_min,wind
1066,0.0,5.6,-3.2,5.7
638,18.5,13.9,10.0,6.3
799,18.8,12.2,6.1,2.2
380,0.0,6.7,-0.6,2.3
303,34.5,15.0,12.2,2.8
...,...,...,...,...
1130,8.4,10.6,4.4,1.7
1294,0.0,33.3,17.8,3.4
860,0.5,15.6,7.2,2.1
1459,0.0,5.6,-1.0,3.4


In [125]:
y_pred

array([0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0], dtype=int64)

In [129]:
# Evaluate
print(classification_report(y_test, model.predict(X_test)))



              precision    recall  f1-score   support

           0       0.95      0.97      0.96       173
           1       0.95      0.93      0.94       120

    accuracy                           0.95       293
   macro avg       0.95      0.95      0.95       293
weighted avg       0.95      0.95      0.95       293



In [131]:
# Save model
with open("weather_model.pkl", "wb") as f:
    pickle.dump(model, f)



In [133]:
# Save predictions to CSV
df['predicted_weather'] = model.predict(X)
df['predicted_weather']

0       0
1       1
2       1
3       1
4       1
       ..
1456    1
1457    1
1458    0
1459    0
1460    0
Name: predicted_weather, Length: 1461, dtype: int64

In [135]:
df['predicted_label'] = df['predicted_weather'].apply(lambda x: "Rain" if x == 1 else "No Rain")
df['predicted_label'] 


0       No Rain
1          Rain
2          Rain
3          Rain
4          Rain
         ...   
1456       Rain
1457       Rain
1458    No Rain
1459    No Rain
1460    No Rain
Name: predicted_label, Length: 1461, dtype: object

In [139]:
df.to_csv("weather_with_predict.csv", index=False)
print(" Model and encoder saved. Predictions file generated.")

 Model and encoder saved. Predictions file generated.


In [141]:
df =pd.read_csv("weather_with_predict.csv")
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,weather_encoded,weather_binary,predicted_weather,predicted_label
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle,0,0,0,No Rain
1,2012-01-02,10.9,10.6,2.8,4.5,rain,2,1,1,Rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain,2,1,1,Rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain,2,1,1,Rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain,2,1,1,Rain
...,...,...,...,...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain,2,1,1,Rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain,2,1,1,Rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog,1,0,0,No Rain
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun,4,0,0,No Rain
