In [1]:
#!pip install tensorflow

In [20]:
import numpy as np
import pandas as pd
import pickle
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

input_processed_path = 'Preprocessed data/inputs/'
target_processed_path = 'Preprocessed data/targets/'

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow import keras

## Importing variables

In [21]:
with open('Preprocessed data/preprocess_vars.pkl', 'rb') as file:
    preprocess_vars = pickle.load(file)

input_cols = preprocess_vars['input_cols']
numeric_cols = preprocess_vars['numeric_cols']
categorical_cols = preprocess_vars['categorical_cols']
encoded_cols = preprocess_vars['encoded_cols']
target_col = preprocess_vars['target_col']

## Importing the inputs and targets

In [22]:
train_inputs = pd.read_parquet(input_processed_path + 'train_inputs.parquet')
val_inputs = pd.read_parquet(input_processed_path + 'val_inputs.parquet')
test_inputs = pd.read_parquet(input_processed_path + 'test_inputs.parquet')

train_targets = pd.read_parquet(target_processed_path + 'train_targets.parquet')[target_col]
val_targets = pd.read_parquet(target_processed_path + 'val_targets.parquet')[target_col]
test_targets = pd.read_parquet(target_processed_path + 'test_targets.parquet')[target_col]

In [23]:
print(f'Train inputs shape: {train_inputs.shape}')
print(f'Validation inputs shape: {val_inputs.shape}')
print(f'Test inputs shape: {test_inputs.shape}')

print(f'Train targets shape: {train_targets.shape}')
print(f'Validation targets shape: {val_targets.shape}')
print(f'Test targets shape: {test_targets.shape}')

Train inputs shape: (9838, 123)
Validation inputs shape: (1669, 123)
Test inputs shape: (2572, 123)
Train targets shape: (9838,)
Validation targets shape: (1669,)
Test targets shape: (2572,)


In [24]:
X_train = train_inputs[numeric_cols + encoded_cols]
X_val = val_inputs[numeric_cols + encoded_cols]
X_test = test_inputs[numeric_cols + encoded_cols]

In [25]:
print(f'X_train dtypes: {X_train.dtypes.unique()}')
print(f'X_val dtypes: {X_val.dtypes.unique()}')
print(f'X_test dtypes: {X_test.dtypes.unique()}')

X_train dtypes: [dtype('float64')]
X_val dtypes: [dtype('float64')]
X_test dtypes: [dtype('float64')]


In [26]:
print(f'Unique train_targets: {train_targets.unique()}')
print(f'Unique val targets: {val_targets.unique()}')
print(f'Unique test targets: {test_targets.unique()}')

Unique train_targets: ['No' 'Yes']
Unique val targets: ['Yes' 'No']
Unique test targets: ['Yes' 'No']


## Target mapping to integer

Since the targets are 'Yes' and 'No' which are strings and cannot be used in neural networks, we have to map them to 0 and 1 respectively.

In [27]:
target_mapping = {train_targets.unique()[i] : i for i in range(len(train_targets.unique()))}
target_mapping

{'No': 0, 'Yes': 1}

In [28]:
train_targets_mapped = train_targets.copy()
val_targets_mapped = val_targets.copy()
test_targets_mapped = test_targets.copy()

print(f'Type of train_targets: {type(train_targets)}\n')
print(f'Type of train_targetst_mapped: {type(train_targets_mapped)}')

Type of train_targets: <class 'pandas.core.series.Series'>

Type of train_targetst_mapped: <class 'pandas.core.series.Series'>


In [29]:
train_targets

130666     No
107539     No
525        No
142472     No
19533     Yes
         ... 
3402       No
66039     Yes
64773      No
7692       No
142497     No
Name: RainTomorrow, Length: 9838, dtype: object

In [30]:
train_targets_mapped = train_targets_mapped.replace(target_mapping)
val_targets_mapped = val_targets_mapped.replace(target_mapping)
test_targets_mapped = test_targets_mapped.replace(target_mapping)
train_targets_mapped

130666    0
107539    0
525       0
142472    0
19533     1
         ..
3402      0
66039     1
64773     0
7692      0
142497    0
Name: RainTomorrow, Length: 9838, dtype: int64

## Preparing the neural network

In [31]:
keras.optimizers

<module 'keras.api.optimizers' from 'C:\\Users\\NILADRI\\AppData\\Roaming\\Python\\Python310\\site-packages\\keras\\api\\optimizers\\__init__.py'>

In [43]:
model = Sequential()
# model.add(keras.Input(shape = train_inputs.shape[1],))
model.add(Dense(128, activation = 'relu', input_shape = (X_train.shape[1],)))
model.add(Dense(80, activation = 'relu'))
# model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation = 'relu'))
# model.add(Dense(4, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [44]:
model.compile(loss = 'binary_crossentropy', 
              metrics = ['accuracy'],
             optimizer = 'sgd')

In [45]:
model.fit(train_inputs[numeric_cols + encoded_cols], 
          train_targets_mapped, epochs = 20, batch_size = 1,
         validation_data = (X_val, val_targets_mapped))

Epoch 1/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2ms/step - accuracy: 0.8051 - loss: 0.4465 - val_accuracy: 0.8484 - val_loss: 0.3439
Epoch 2/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 3ms/step - accuracy: 0.8317 - loss: 0.3756 - val_accuracy: 0.8514 - val_loss: 0.3368
Epoch 3/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3ms/step - accuracy: 0.8453 - loss: 0.3624 - val_accuracy: 0.8610 - val_loss: 0.3282
Epoch 4/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 2ms/step - accuracy: 0.8436 - loss: 0.3550 - val_accuracy: 0.8580 - val_loss: 0.3325
Epoch 5/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.8529 - loss: 0.3403 - val_accuracy: 0.8688 - val_loss: 0.3150
Epoch 6/20
[1m9838/9838[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 3ms/step - accuracy: 0.8524 - loss: 0.3362 - val_accuracy: 0.8676 - val_loss: 0.3176
Epoch 7/20

<keras.src.callbacks.history.History at 0x2936824f220>

In [46]:
loss, accuracy = model.evaluate(test_inputs[numeric_cols + encoded_cols], test_targets_mapped)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8270 - loss: 0.4568
Loss: 0.4335860013961792
Accuracy: 0.8359253406524658
