In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [26]:
dataset=pd.read_csv('weather_classification_data.csv')
dataset.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy


In [27]:
X = dataset.iloc[:, :-1].values
y = dataset['Weather Type']

In [28]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [4,7,9])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [29]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [30]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [31]:
from sklearn.preprocessing import StandardScaler
# Standardize the numeric features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on the training data and transform it
X_test = scaler.transform(X_test)        # Only transform the test data

## Logistic Regression

In [32]:
# Train the Logistic Regression model
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [33]:
# Predict on the test set
y_pred = model.predict(X_test)

In [34]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Accuracy: 0.87
Confusion Matrix:
[[557  65  12  25]
 [ 44 572  53  13]
 [ 21   8 619  16]
 [ 53  15  15 552]]


In [35]:
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.85      0.84       659
           1       0.87      0.84      0.85       682
           2       0.89      0.93      0.91       664
           3       0.91      0.87      0.89       635

    accuracy                           0.87      2640
   macro avg       0.87      0.87      0.87      2640
weighted avg       0.87      0.87      0.87      2640



## Training the K-NN model on the Training set

In [36]:
from sklearn.neighbors import KNeighborsClassifier
model1 = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
model1.fit(X_train, y_train)

KNeighborsClassifier()

In [37]:
y_pred1 = model1.predict(X_test)

In [38]:
cm1 = confusion_matrix(y_test, y_pred1)
print(cm1)
accuracy_score(y_test, y_pred1)

[[572  55  13  19]
 [ 32 614  27   9]
 [ 17  22 613  12]
 [ 27  34   6 568]]


0.8965909090909091

In [39]:
report = classification_report(y_test, y_pred1)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.87      0.88       659
           1       0.85      0.90      0.87       682
           2       0.93      0.92      0.93       664
           3       0.93      0.89      0.91       635

    accuracy                           0.90      2640
   macro avg       0.90      0.90      0.90      2640
weighted avg       0.90      0.90      0.90      2640



## Training the SVM model on the Training set

In [40]:
from sklearn.svm import SVC
model2 = SVC(kernel = 'linear', random_state = 0)
model2.fit(X_train, y_train)

SVC(kernel='linear', random_state=0)

In [41]:
y_pred2 = model2.predict(X_test)

In [42]:
cm2 = confusion_matrix(y_test, y_pred2)
print(cm2)
accuracy_score(y_test, y_pred2)

[[566  53  12  28]
 [ 23 599  40  20]
 [ 17   8 622  17]
 [ 30  12  20 573]]


0.8939393939393939

## Training the Kernel SVM model on the Training set

In [43]:
from sklearn.svm import SVC
model3 = SVC(kernel = 'rbf', random_state = 0)
model3.fit(X_train, y_train)

SVC(random_state=0)

In [44]:
y_pred3 = model3.predict(X_test)

In [45]:
cm3 = confusion_matrix(y_test, y_pred3)
print(cm3)
accuracy_score(y_test, y_pred3)

[[586  40  11  22]
 [ 29 627  19   7]
 [ 13  17 615  19]
 [ 28  19   7 581]]


0.9125

## Training the Naive Bayes model on the Training set

In [46]:
from sklearn.naive_bayes import GaussianNB
model4 = GaussianNB()
model4.fit(X_train, y_train)

GaussianNB()

In [47]:
y_pred4 = model4.predict(X_test)

In [48]:
cm4 = confusion_matrix(y_test, y_pred4)
print(cm4)
accuracy_score(y_test, y_pred4)

[[524  88  46   1]
 [ 59 520 103   0]
 [ 35  15 613   1]
 [195  24   7 409]]


0.7825757575757576

## Decision Tree Classification 

In [49]:
from sklearn.tree import DecisionTreeClassifier
model5 = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
model5.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', random_state=0)

In [50]:
y_pred5 = model5.predict(X_test)

In [51]:
cm5 = confusion_matrix(y_test, y_pred5)
print(cm5)
accuracy_score(y_test, y_pred5)

[[587  32  15  25]
 [ 38 613  15  16]
 [ 17  15 616  16]
 [ 23  11  11 590]]


0.9113636363636364

## Random Forest Classification

In [52]:
from sklearn.ensemble import RandomForestClassifier
model6 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
model6.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)

In [53]:
y_pred6 = model6.predict(X_test)

In [54]:
cm6 = confusion_matrix(y_test, y_pred6)
print(cm6)
accuracy_score(y_test, y_pred6)

[[601  31  12  15]
 [ 35 626  12   9]
 [ 14  23 617  10]
 [ 28  10   9 588]]


0.9212121212121213