# Artifical Neural Nets to Predict the Weather

## Importing Libraries

In [23]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from tensorflow.keras.utils import to_categorical

## Data Pre-Processing

### Importing Data

In [24]:
data = pd.read_csv("../Datasets/weather_classification_data.csv")

data.head()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy


In [25]:
data.shape

(13200, 11)

In [26]:
data.dtypes

Temperature             float64
Humidity                  int64
Wind Speed              float64
Precipitation (%)       float64
Cloud Cover              object
Atmospheric Pressure    float64
UV Index                  int64
Season                   object
Visibility (km)         float64
Location                 object
Weather Type             object
dtype: object

### Null Value Analysis

In [27]:
data.isnull().sum()

Temperature             0
Humidity                0
Wind Speed              0
Precipitation (%)       0
Cloud Cover             0
Atmospheric Pressure    0
UV Index                0
Season                  0
Visibility (km)         0
Location                0
Weather Type            0
dtype: int64

### General Overview of the Data

In [28]:
data.describe()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Atmospheric Pressure,UV Index,Visibility (km)
count,13200.0,13200.0,13200.0,13200.0,13200.0,13200.0,13200.0
mean,19.127576,68.710833,9.832197,53.644394,1005.827896,4.005758,5.462917
std,17.386327,20.194248,6.908704,31.946541,37.199589,3.8566,3.371499
min,-25.0,20.0,0.0,0.0,800.12,0.0,0.0
25%,4.0,57.0,5.0,19.0,994.8,1.0,3.0
50%,21.0,70.0,9.0,58.0,1007.65,3.0,5.0
75%,31.0,84.0,13.5,82.0,1016.7725,7.0,7.5
max,109.0,109.0,48.5,109.0,1199.21,14.0,20.0


Outliers do exist in this dataset. Temperature in °C is above 100°C and the Precipitation and the Humidity Percentage being above 100%. This shows that outliers are existing and they need to be removed from this dataset

### Outlier Detection

In [29]:
cleaned_df = data.copy()

for column in data.columns:
    if data[column].dtype != 'object':
        lq = round(data[column].quantile(0.25), 2)
        up = round(data[column].quantile(0.75), 2)
        iqr = round(up - lq, 2)
        
        upper_limit = round(up + 1.5 * iqr, 2)
        lower_limit = round(lq - 1.5 * iqr, 2)
        
        cleaned_df = cleaned_df[(cleaned_df[column] >= lower_limit) & (cleaned_df[column] <= upper_limit)]
        
cleaned_df = cleaned_df[(cleaned_df['Humidity'] <= 100) & (cleaned_df['Precipitation (%)'] <= 100)]

cleaned_df.reset_index(drop = True)

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy
...,...,...,...,...,...,...,...,...,...,...,...
11164,29.0,62,13.0,17.0,overcast,1002.81,2,Spring,5.0,coastal,Cloudy
11165,10.0,74,14.5,71.0,overcast,1003.15,1,Summer,1.0,mountain,Rainy
11166,30.0,77,5.5,28.0,overcast,1012.69,3,Autumn,9.0,coastal,Cloudy
11167,3.0,76,10.0,94.0,overcast,984.27,0,Winter,2.0,inland,Snowy


In [30]:
cleaned_df

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location,Weather Type
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland,Rainy
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland,Cloudy
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain,Sunny
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal,Sunny
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain,Rainy
...,...,...,...,...,...,...,...,...,...,...,...
13194,29.0,62,13.0,17.0,overcast,1002.81,2,Spring,5.0,coastal,Cloudy
13195,10.0,74,14.5,71.0,overcast,1003.15,1,Summer,1.0,mountain,Rainy
13197,30.0,77,5.5,28.0,overcast,1012.69,3,Autumn,9.0,coastal,Cloudy
13198,3.0,76,10.0,94.0,overcast,984.27,0,Winter,2.0,inland,Snowy


In [31]:
cleaned_df.describe()

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Atmospheric Pressure,UV Index,Visibility (km)
count,11169.0,11169.0,11169.0,11169.0,11169.0,11169.0,11169.0
mean,18.730773,68.227952,9.045035,49.967052,1006.227645,3.447399,5.133226
std,15.489244,18.752368,5.483878,31.238422,12.953355,3.475577,2.578344
min,-24.0,20.0,0.0,0.0,962.88,0.0,0.0
25%,5.0,58.0,5.0,18.0,995.78,1.0,3.0
50%,21.0,69.0,8.5,54.0,1007.79,2.0,5.0
75%,30.0,82.0,13.0,78.0,1016.28,5.0,7.5
max,71.0,100.0,26.0,100.0,1049.2,14.0,14.0


### Variable Selection

In [32]:
X = cleaned_df.drop('Weather Type', axis = 1)
y = cleaned_df['Weather Type']

display(X.head())
display(y.head())

Unnamed: 0,Temperature,Humidity,Wind Speed,Precipitation (%),Cloud Cover,Atmospheric Pressure,UV Index,Season,Visibility (km),Location
0,14.0,73,9.5,82.0,partly cloudy,1010.82,2,Winter,3.5,inland
1,39.0,96,8.5,71.0,partly cloudy,1011.43,7,Spring,10.0,inland
2,30.0,64,7.0,16.0,clear,1018.72,5,Spring,5.5,mountain
3,38.0,83,1.5,82.0,clear,1026.25,7,Spring,1.0,coastal
4,27.0,74,17.0,66.0,overcast,990.67,1,Winter,2.5,mountain


0     Rainy
1    Cloudy
2     Sunny
3     Sunny
4     Rainy
Name: Weather Type, dtype: object

### Encoding

In [33]:
categorical_columns = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_columns, drop_first=True)
X = np.array(X)
y = np.array(y)

le = LabelEncoder()
y = le.fit_transform(y)
y = to_categorical(y)

display(X.shape)

display(y.shape)

y

(11169, 15)

(11169, 4)

array([[0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       ...,
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.]])

### Train-Test Split

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Feature Scaling

In [35]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Creating the ANN

### Initializing the ANN

In [36]:
ann = tf.keras.models.Sequential()

### Adding Input Layer and Hidden Layers

In [37]:
ann.add(tf.keras.layers.Dense(units = 20, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units = 20, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units = 20, activation = 'relu'))

### Adding Output Layer

In [38]:
ann.add(tf.keras.layers.Dense(units = 4, activation = 'softmax'))

## Training the ANN

### Compiling the ANN

In [39]:
ann.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

### Training the ANN

In [40]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

Epoch 1/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6490 - loss: 0.8896
Epoch 2/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9510 - loss: 0.1866
Epoch 3/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9666 - loss: 0.1317
Epoch 4/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9698 - loss: 0.1233
Epoch 5/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9690 - loss: 0.1135
Epoch 6/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9738 - loss: 0.0894
Epoch 7/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9714 - loss: 0.0916
Epoch 8/100
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9718 - loss: 0.0867
Epoch 9/100
[1m280/280[0m [32

<keras.src.callbacks.history.History at 0x1f8d4c9d250>

## Making Predictions and Evaluating the ANN Model

### Predicting the Test Set