In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv("weather_classification_data.csv")
df.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy


In [3]:
df.isnull().sum()

Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64

In [4]:
df1=df.drop(['Cloud Cover', 'UV Index', 'Visibility (km)'],axis="columns")
df1.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Atmospheric Pressure,Season,Location,Weather Type
0,14.0,73,9.5,82.0,1010.82,Winter,inland,Rainy
1,39.0,96,8.5,71.0,1011.43,Spring,inland,Cloudy
2,30.0,64,7.0,16.0,1018.72,Spring,mountain,Sunny
3,38.0,83,1.5,82.0,1026.25,Spring,coastal,Sunny
4,27.0,74,17.0,66.0,990.67,Winter,mountain,Rainy


In [5]:
from sklearn.preprocessing import LabelEncoder

Season_encoder = LabelEncoder()
Location_encoder = LabelEncoder()
Weather_Type_encoder = LabelEncoder()

df1['Season'] = Season_encoder.fit_transform(df1['Season'])
df1['Location'] = Location_encoder.fit_transform(df1['Location'])
df1['Weather Type'] = Weather_Type_encoder.fit_transform(df1['Weather Type'])

print("Season mapping:")
print(dict(zip(Season_encoder.classes_, range(len(Season_encoder.classes_)))))

print("Location mapping:")
print(dict(zip(Location_encoder.classes_, range(len(Location_encoder.classes_)))))

print("Weather Type mapping:")
print(dict(zip(Weather_Type_encoder.classes_, range(len(Weather_Type_encoder.classes_)))))

Season mapping:
{'Autumn': 0, 'Spring': 1, 'Summer': 2, 'Winter': 3}
Location mapping:
{'coastal': 0, 'inland': 1, 'mountain': 2}
Weather Type mapping:
{'Cloudy': 0, 'Rainy': 1, 'Snowy': 2, 'Sunny': 3}


In [20]:
df1.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Atmospheric Pressure,Season,Location,Weather Type
0,14.0,73,9.5,82.0,1010.82,3,1,1
1,39.0,96,8.5,71.0,1011.43,1,1,0
2,30.0,64,7.0,16.0,1018.72,1,2,3
3,38.0,83,1.5,82.0,1026.25,1,0,3
4,27.0,74,17.0,66.0,990.67,3,2,1


In [7]:
X = df1.drop(['Weather Type', 'Atmospheric Pressure','Location'], axis='columns')
y = df1['Weather Type']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
len(X)

13200

In [10]:
len(y)

13200

In [11]:
from sklearn.svm import SVC
model=SVC()
model.fit(X_train,y_train)

In [12]:
model.score(X_train,y_train)

0.8526515151515152

In [13]:
model.score(X_test,y_test)

0.8553030303030303

In [14]:
from sklearn.model_selection import cross_val_score

# Perform cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=10)
print("Cross-validation scores: ", cv_scores)
print("Mean CV score: ", cv_scores.mean())

Cross-validation scores:  [0.84185606 0.86931818 0.84659091 0.84659091 0.84753788 0.83901515
 0.84848485 0.85700758 0.84280303 0.86742424]
Mean CV score:  0.8506628787878789


In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Make predictions on the test set
test_predictions = model.predict(X_test)

# Calculate precision, recall, and F1 score
precision = precision_score(y_test, test_predictions, average='weighted')
recall = recall_score(y_test, test_predictions, average='weighted')
f1 = f1_score(y_test, test_predictions, average='weighted')

# Print the results
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Precision: 0.86
Recall: 0.86
F1 Score: 0.85


In [16]:
import pickle

In [18]:
with open('newweather.pkl', 'wb') as file:
    pickle.dump(model, file)