In [116]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import warnings
warnings.filterwarnings("ignore")
dataset = pd.read_csv(r"C:\Users\USER\Desktop\Ann2\weather_classification_data.csv",header=0)
dataset.head(2)

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy


In [117]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13200 entries, 0 to 13199
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Temperature           13200 non-null  float64
 1   Humidity              13200 non-null  int64  
 2   Wind Speed            13200 non-null  float64
 3   Precipitation (%)     13200 non-null  float64
 4   Cloud Cover           13200 non-null  object 
 5   Atmospheric Pressure  13200 non-null  float64
 6   UV Index              13200 non-null  int64  
 7   Season                13200 non-null  object 
 8   Visibility (km)       13200 non-null  float64
 9   Location              13200 non-null  object 
 10  Weather Type          13200 non-null  object 
dtypes: float64(5), int64(2), object(4)
memory usage: 1.1+ MB


In [118]:
dataset.rename(columns = {
    "Wind Speed" : "WindSpeed",
    "Precipitation (%)" : "Precipitation",
    "Cloud Cover" : "CloudCover",
    "Atmospheric Pressure":"AtmosphericPressure",
    "UV Index" : "UVIndex",
    "Visibility (km)" : "Visibility",
    "Weather Type" : "WeatherType"
},inplace=True)

In [119]:
dataset.columns

Index(['Temperature', 'Humidity', 'WindSpeed', 'Precipitation', 'CloudCover',
       'AtmosphericPressure', 'UVIndex', 'Season', 'Visibility', 'Location',
       'WeatherType'],
      dtype='object')

In [120]:
dataset.isnull().sum()

Temperature            0
Humidity               0
WindSpeed              0
Precipitation          0
CloudCover             0
AtmosphericPressure    0
UVIndex                0
Season                 0
Visibility             0
Location               0
WeatherType            0
dtype: int64

In [121]:
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
X = dataset.iloc[:,:-1]
y = dataset.iloc[:,-1:]
y["WeatherType"] = label.fit_transform(y["WeatherType"])
y 


Unnamed: 0,WeatherType
0,1
1,0
2,3
3,3
4,1
...,...
13195,1
13196,2
13197,0
13198,2


In [122]:
print(y.value_counts())

WeatherType
0              3300
1              3300
2              3300
3              3300
Name: count, dtype: int64


In [123]:
print(dataset["CloudCover"].value_counts())
print(dataset.Location.value_counts())
print(dataset.Season.value_counts())

CloudCover
overcast         6090
partly cloudy    4560
clear            2139
cloudy            411
Name: count, dtype: int64
Location
inland      4816
mountain    4813
coastal     3571
Name: count, dtype: int64
Season
Winter    5610
Spring    2598
Autumn    2500
Summer    2492
Name: count, dtype: int64


In [124]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,MinMaxScaler
onehot = OneHotEncoder(drop="first")
label = LabelEncoder()
minmax = MinMaxScaler()

In [125]:
X_train["Season"] = label.fit_transform(X_train["Season"])
X_test["Season"] = label.transform(X_test["Season"])

In [126]:
catcols = X.select_dtypes(include="object").columns.tolist()
catcols.remove("Season")
catcols

['CloudCover', 'Location']

In [127]:
numcols = X.select_dtypes(exclude="object").columns
numcols

Index(['Temperature', 'Humidity', 'WindSpeed', 'Precipitation',
       'AtmosphericPressure', 'UVIndex', 'Visibility'],
      dtype='object')

In [128]:
dataset.head(1)

Unnamed: 0,Temperature,Humidity,WindSpeed,Precipitation,CloudCover,AtmosphericPressure,UVIndex,Season,Visibility,Location,WeatherType
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy


In [129]:
from sklearn.compose import ColumnTransformer
preprocess = ColumnTransformer(
    [
        ("OneHotEncoder",onehot,catcols),
        ("MinMaxScaler",minmax,numcols)
    ],remainder="passthrough"
)
X_train_sc = preprocess.fit_transform(X_train)
X_test_sc = preprocess.transform(X_test)
X_train_sc = pd.DataFrame(X_train_sc)
X_test_sc = pd.DataFrame(X_test_sc)

In [130]:
import pickle
with open("label.pkl","wb") as file:
    pickle.dump(label,file)
with open("label.pkl","rb") as file:
    label = pickle.load(file)
              

In [131]:
with open("preprocess.pkl","wb") as file:
    pickle.dump(preprocess,file)
with open("preprocess.pkl","rb") as file:
    preprocess = pickle.load(file)    

In [132]:
X_train_sc.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.0,1.0,0.0,0.0,1.0,0.360902,0.617978,0.360825,0.770642,0.500539,0.214286,0.25,2.0


ANN implementation


In [133]:
import keras
import tensorflow as tf
from tensorflow.keras.models import load_model,Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

In [134]:
model = Sequential([
    Dense(64,activation="relu",input_shape=(X_train_sc.shape[1],)),
    Dense(32,activation="relu"),
    Dense(16,activation="relu"),
    Dense(4,activation="softmax")
])


In [135]:
model.summary()

In [136]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)


In [137]:
model.compile(optimizer=opt,loss=tf.keras.losses.sparse_categorical_crossentropy,metrics=["accuracy"])

In [138]:
import datetime
logs = "logs/fit" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
callback = TensorBoard(log_dir=logs,histogram_freq=1)
stoppage = EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True)

In [139]:
history = model.fit(X_train_sc,y_train,validation_data=(X_test_sc,y_test),epochs=100,callbacks=[callback,stoppage])

Epoch 1/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.7548 - loss: 0.6784 - val_accuracy: 0.8876 - val_loss: 0.3397
Epoch 2/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8902 - loss: 0.3087 - val_accuracy: 0.8755 - val_loss: 0.3327
Epoch 3/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8922 - loss: 0.2802 - val_accuracy: 0.8664 - val_loss: 0.4029
Epoch 4/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9009 - loss: 0.2637 - val_accuracy: 0.8929 - val_loss: 0.2908
Epoch 5/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9037 - loss: 0.2416 - val_accuracy: 0.8917 - val_loss: 0.2698
Epoch 6/100
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9068 - loss: 0.2399 - val_accuracy: 0.8899 - val_loss: 0.2763
Epoch 7/100
[1m289/28

In [140]:
model.save("model.h5")



In [141]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [142]:
dataset.head(10)

Unnamed: 0,Temperature,Humidity,WindSpeed,Precipitation,CloudCover,AtmosphericPressure,UVIndex,Season,Visibility,Location,WeatherType
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy
5,32.0,55,3.5,26.0,overcast,1010.03,2,Summer,5.0,inland,Cloudy
6,-2.0,97,8.0,86.0,overcast,990.87,1,Winter,4.0,inland,Snowy
7,3.0,85,6.0,96.0,partly cloudy,984.46,1,Winter,3.5,inland,Snowy
8,3.0,83,6.0,66.0,overcast,999.44,0,Winter,1.0,mountain,Snowy
9,28.0,74,8.5,107.0,clear,1012.13,8,Winter,7.5,coastal,Sunny


In [143]:
dataset.tail(10)

Unnamed: 0,Temperature,Humidity,WindSpeed,Precipitation,CloudCover,AtmosphericPressure,UVIndex,Season,Visibility,Location,WeatherType
13190,30.0,24,3.5,16.0,partly cloudy,1017.54,11,Summer,6.5,mountain,Sunny
13191,27.0,48,6.5,14.0,clear,1029.37,8,Summer,8.0,inland,Sunny
13192,31.0,24,8.0,5.0,clear,1029.61,8,Summer,9.0,inland,Sunny
13193,-5.0,65,15.5,50.0,overcast,982.57,1,Winter,5.0,inland,Snowy
13194,29.0,62,13.0,17.0,overcast,1002.81,2,Spring,5.0,coastal,Cloudy
13195,10.0,74,14.5,71.0,overcast,1003.15,1,Summer,1.0,mountain,Rainy
13196,-1.0,76,3.5,23.0,cloudy,1067.23,1,Winter,6.0,coastal,Snowy
13197,30.0,77,5.5,28.0,overcast,1012.69,3,Autumn,9.0,coastal,Cloudy
13198,3.0,76,10.0,94.0,overcast,984.27,0,Winter,2.0,inland,Snowy
13199,-5.0,38,0.0,92.0,overcast,1015.37,5,Autumn,10.0,mountain,Rainy


In [144]:
X.columns

Index(['Temperature', 'Humidity', 'WindSpeed', 'Precipitation', 'CloudCover',
       'AtmosphericPressure', 'UVIndex', 'Season', 'Visibility', 'Location'],
      dtype='object')

In [145]:
dataset.Temperature.unique()

array([ 14.,  39.,  30.,  38.,  27.,  32.,  -2.,   3.,  28.,  35.,  12.,
       -10.,  24.,  10.,  33.,  43.,  13.,  -7.,  26.,   4.,  17.,  40.,
         2.,  15.,  29.,  11.,  -9.,  36.,  42.,  21.,  22.,  25.,  -4.,
        -1.,  -5.,  41.,  31.,  16.,  34.,  49.,  19.,  23.,  20.,  -3.,
        18.,   1.,   0.,  46.,  44., -13.,  -6.,  78.,  63.,  73.,   8.,
       -12., -24.,  -8.,  60.,  48.,   5.,  51., -14.,  50.,  37.,  54.,
        47.,  70.,   9.,  66., -16., -15.,  59.,  80., -19.,  52.,  45.,
         6., -18., -11.,  74.,  76.,  55., -20.,  57.,  91.,  82., -17.,
        61.,   7.,  53.,  65.,  77.,  67.,  64.,  58.,  68.,  72.,  62.,
        71.,  56., 107., -22.,  75.,  85.,  97.,  84., -21.,  92., -25.,
        81., 109.,  98.,  94.,  90., -23.,  88.,  99.,  69., 100.,  89.,
       102.,  86., 108.,  87.,  95.])