# Load Dataset

In [17]:
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pickle

df = pd.read_csv('../artifacts/engineered_data.csv')

y = df['Weather Type']
x = df.drop(columns=['Weather Type'])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# XGBoost

In [18]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42)

xgb_model.fit(x_train, y_train)

xgb_pred = xgb_model.predict(x_test)

print("XGBoost Classification Report:")
print(classification_report(y_test, xgb_pred))

with open('../data/xgb_model.pkl', 'wb') as xgb_file:
    pickle.dump(xgb_model, xgb_file)

XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.97      0.96       581
           1       0.98      0.98      0.98       569
           2       1.00      0.99      0.99       558
           3       0.98      0.98      0.98       593

    accuracy                           0.98      2301
   macro avg       0.98      0.98      0.98      2301
weighted avg       0.98      0.98      0.98      2301



# Random Forest Classifier

In [19]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)

rf_model.fit(x_train, y_train)

rf_pred = rf_model.predict(x_test)

print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))

with open('../data/rf_model.pkl', 'wb') as rf_file:
    pickle.dump(rf_model, rf_file)

Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.97      0.97       581
           1       0.98      0.98      0.98       569
           2       1.00      0.99      0.99       558
           3       0.99      0.98      0.99       593

    accuracy                           0.98      2301
   macro avg       0.98      0.98      0.98      2301
weighted avg       0.98      0.98      0.98      2301



# Support Vector Machine (SVM)

In [20]:
from sklearn.svm import SVC

svm_model = SVC(random_state=42)

svm_model.fit(x_train, y_train)

svm_pred = svm_model.predict(x_test)

print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

with open('../data/svm_model.pkl', 'wb') as svm_file:
    pickle.dump(svm_model, svm_file)

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.97      0.95       581
           1       0.97      0.97      0.97       569
           2       1.00      0.99      0.99       558
           3       0.98      0.96      0.97       593

    accuracy                           0.97      2301
   macro avg       0.97      0.97      0.97      2301
weighted avg       0.97      0.97      0.97      2301

