In [17]:
import tensorflow as tf
from keras import layers, models
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight

# Reading data
data = pd.read_csv("tnst_matched.csv")

# Loading Inputs
scaler = StandardScaler()
input_features = ["wind_dir", "wind_speed", "ceiling", "visibility", "temp", "dew_pnt", "pressure", "congestion_score"]
X = scaler.fit_transform(data[input_features]) # scaling inputs

# Loading Outputs (5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, inf)
y = np.zeros((len(data), 25)) # one-hot encoding for 26 classes
for i, delay in enumerate(data["weather_delay"].values):
    if delay >= 120:
        y[i][24] = 1
    else:
        y[i][int(delay // 5) + 1] = 1

print("Input and Output shapes:")
print(X.shape, y.shape)
print(y[0:5])  # Print first 5 one-hot encoded outputs

Input and Output shapes:
(348403, 8) (348403, 25)
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0.]]


In [19]:
# Splitting data into training and testing sets (75% train, 25% test)
X_train, X_test = X[:int(len(data)*0.75)], X[int(len(data)*0.75):]
y_train, y_test = y[:int(len(data)*0.75)], y[int(len(data)*0.75):]

# Building classification weights
classes = np.unique(np.argmax(y_train, axis=1))
class_weights = compute_class_weight('balanced', classes=classes, y=np.argmax(y_train, axis=1))
exp_weights = np.exp(3*(class_weights/np.max(class_weights))) # making weights exponential
exp_weights = exp_weights/np.mean(exp_weights) # normalizing weights
class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)

# Building the model
model = models.Sequential([
    layers.Input(shape=(X.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(25, activation='softmax')
])
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

# Training the model
model.fit(X_train, y_train,
          epochs=5,
          batch_size=32,
          validation_split=0.2,
          verbose=2,
          class_weight=class_weight_dict)

# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)
print(classification_report(np.argmax(y_test, axis=1), 
                            np.argmax(model.predict(X_test), axis=1)))

Class weights: {0: np.float64(0.042020128339714295), 1: np.float64(31.742225461613216), 2: np.float64(51.3565251572327), 3: np.float64(43.550333333333334), 4: np.float64(62.57231800766284), 5: np.float64(74.57248858447488), 6: np.float64(89.24248633879782), 7: np.float64(103.69126984126984), 8: np.float64(126.5998062015504), 9: np.float64(141.39718614718615), 10: np.float64(151.2164351851852), 11: np.float64(194.42113095238096), 12: np.float64(209.37660256410257), 13: np.float64(253.1996124031008), 14: np.float64(388.8422619047619), 15: np.float64(272.18958333333336), 16: np.float64(388.8422619047619), 17: np.float64(544.3791666666667), 18: np.float64(435.50333333333333), 19: np.float64(494.8901515151515), 20: np.float64(494.8901515151515), 21: np.float64(518.4563492063492), 22: np.float64(725.8388888888888), 23: np.float64(45.5547419804742)}


Epoch 1/5
6533/6533 - 13s - 2ms/step - accuracy: 0.9908 - loss: 8.6166 - val_accuracy: 0.9927 - val_loss: 0.0992
Epoch 2/5
6533/6533 - 12s - 2ms/step - accuracy: 0.9913 - loss: 7.8243 - val_accuracy: 0.9927 - val_loss: 0.0830
Epoch 3/5
6533/6533 - 12s - 2ms/step - accuracy: 0.9913 - loss: 7.7523 - val_accuracy: 0.9927 - val_loss: 0.0777
Epoch 4/5
6533/6533 - 21s - 3ms/step - accuracy: 0.9913 - loss: 7.6309 - val_accuracy: 0.9927 - val_loss: 0.0876
Epoch 5/5
6533/6533 - 11s - 2ms/step - accuracy: 0.9913 - loss: 7.5418 - val_accuracy: 0.9927 - val_loss: 0.0787
2722/2722 - 4s - 1ms/step - accuracy: 0.9914 - loss: 0.0882

Test accuracy: 0.9914007782936096
[1m2722/2722[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 988us/step
              precision    recall  f1-score   support

           1       0.99      1.00      1.00     86352
           2       0.00      0.00      0.00       100
           3       0.00      0.00      0.00        79
           4       0.00      0.00      0.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
