In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

# It suggest sklearn.ensemble.HistGradientBoostingClassifier and Regressor, for NaN handlers, or preprocessing data

In [2]:
# Read the CSV file into a Pandas DataFrame
try:
    df = pd.read_csv("CFA_dataset_clean.csv")
except Exception as e:
    print("An error occured:", e)

In [3]:
df.isnull().values.sum()

0

In [4]:
df.dtypes

incident_datetime                          object
Incident_count                              int64
Ball_Direction of maximum wind gust        object
Ball_Maximum temperature (°C)             float64
Ball_Minimum temperature (°C)             float64
Ball_Speed of maximum wind gust (km/h)    float64
Fern_Direction of maximum wind gust        object
Fern_Maximum temperature (°C)             float64
Fern_Minimum temperature (°C)             float64
Fern_Speed of maximum wind gust (km/h)    float64
Wang_Direction of maximum wind gust        object
Wang_Maximum temperature (°C)             float64
Wang_Minimum temperature (°C)             float64
Wang_Speed of maximum wind gust (km/h)    float64
dtype: object

In [5]:
df.drop(columns=['incident_datetime'], inplace=True)

In [6]:
enc = OneHotEncoder(handle_unknown='ignore')
enc_arr = enc.fit_transform(df[['Ball_Direction of maximum wind gust']])
column_names = enc.get_feature_names_out(input_features=['Ball_Direction of maximum wind gust'])

enc_df = pd.DataFrame(enc_arr.toarray(), columns=column_names)

df = df.join(enc_df)
df

Unnamed: 0,Incident_count,Ball_Direction of maximum wind gust,Ball_Maximum temperature (°C),Ball_Minimum temperature (°C),Ball_Speed of maximum wind gust (km/h),Fern_Direction of maximum wind gust,Fern_Maximum temperature (°C),Fern_Minimum temperature (°C),Fern_Speed of maximum wind gust (km/h),Wang_Direction of maximum wind gust,...,Ball_Direction of maximum wind gust_NW,Ball_Direction of maximum wind gust_None,Ball_Direction of maximum wind gust_S,Ball_Direction of maximum wind gust_SE,Ball_Direction of maximum wind gust_SSE,Ball_Direction of maximum wind gust_SSW,Ball_Direction of maximum wind gust_SW,Ball_Direction of maximum wind gust_W,Ball_Direction of maximum wind gust_WNW,Ball_Direction of maximum wind gust_WSW
0,204,NNW,30.5,2.9,78.0,E,18.9,11.8,15.0,N,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,132,NW,16.4,9.0,61.0,NW,21.2,10.1,19.0,NNW,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,162,S,28.9,8.0,46.0,NW,10.7,7.2,24.0,WNW,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,140,SE,26.0,13.3,43.0,WNW,10.7,8.6,26.0,NNE,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,135,SE,32.8,12.3,43.0,SW,11.4,8.5,43.0,WSW,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,63,SW,10.0,5.6,43.0,NW,7.8,6.7,30.0,WNW,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1267,68,SSW,9.9,-0.5,28.0,WSW,7.3,4.2,17.0,NE,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1268,56,N,10.1,-0.1,43.0,N,9.9,3.9,39.0,SE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,63,N,7.4,2.2,46.0,N,7.7,4.8,33.0,S,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
enc = OneHotEncoder(handle_unknown='ignore')
enc_arr = enc.fit_transform(df[['Fern_Direction of maximum wind gust']])
column_names = enc.get_feature_names_out(input_features=['Fern_Direction of maximum wind gust'])

enc_df = pd.DataFrame(enc_arr.toarray(), columns=column_names)

df = df.join(enc_df)
df

Unnamed: 0,Incident_count,Ball_Direction of maximum wind gust,Ball_Maximum temperature (°C),Ball_Minimum temperature (°C),Ball_Speed of maximum wind gust (km/h),Fern_Direction of maximum wind gust,Fern_Maximum temperature (°C),Fern_Minimum temperature (°C),Fern_Speed of maximum wind gust (km/h),Wang_Direction of maximum wind gust,...,Fern_Direction of maximum wind gust_NW,Fern_Direction of maximum wind gust_None,Fern_Direction of maximum wind gust_S,Fern_Direction of maximum wind gust_SE,Fern_Direction of maximum wind gust_SSE,Fern_Direction of maximum wind gust_SSW,Fern_Direction of maximum wind gust_SW,Fern_Direction of maximum wind gust_W,Fern_Direction of maximum wind gust_WNW,Fern_Direction of maximum wind gust_WSW
0,204,NNW,30.5,2.9,78.0,E,18.9,11.8,15.0,N,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,132,NW,16.4,9.0,61.0,NW,21.2,10.1,19.0,NNW,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,162,S,28.9,8.0,46.0,NW,10.7,7.2,24.0,WNW,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,140,SE,26.0,13.3,43.0,WNW,10.7,8.6,26.0,NNE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,135,SE,32.8,12.3,43.0,SW,11.4,8.5,43.0,WSW,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,63,SW,10.0,5.6,43.0,NW,7.8,6.7,30.0,WNW,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1267,68,SSW,9.9,-0.5,28.0,WSW,7.3,4.2,17.0,NE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1268,56,N,10.1,-0.1,43.0,N,9.9,3.9,39.0,SE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,63,N,7.4,2.2,46.0,N,7.7,4.8,33.0,S,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
enc = OneHotEncoder(handle_unknown='ignore')
enc_arr = enc.fit_transform(df[['Wang_Direction of maximum wind gust']])
column_names = enc.get_feature_names_out(input_features=['Wang_Direction of maximum wind gust'])

enc_df = pd.DataFrame(enc_arr.toarray(), columns=column_names)

df = df.join(enc_df)
df

Unnamed: 0,Incident_count,Ball_Direction of maximum wind gust,Ball_Maximum temperature (°C),Ball_Minimum temperature (°C),Ball_Speed of maximum wind gust (km/h),Fern_Direction of maximum wind gust,Fern_Maximum temperature (°C),Fern_Minimum temperature (°C),Fern_Speed of maximum wind gust (km/h),Wang_Direction of maximum wind gust,...,Wang_Direction of maximum wind gust_NW,Wang_Direction of maximum wind gust_None,Wang_Direction of maximum wind gust_S,Wang_Direction of maximum wind gust_SE,Wang_Direction of maximum wind gust_SSE,Wang_Direction of maximum wind gust_SSW,Wang_Direction of maximum wind gust_SW,Wang_Direction of maximum wind gust_W,Wang_Direction of maximum wind gust_WNW,Wang_Direction of maximum wind gust_WSW
0,204,NNW,30.5,2.9,78.0,E,18.9,11.8,15.0,N,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,132,NW,16.4,9.0,61.0,NW,21.2,10.1,19.0,NNW,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,162,S,28.9,8.0,46.0,NW,10.7,7.2,24.0,WNW,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,140,SE,26.0,13.3,43.0,WNW,10.7,8.6,26.0,NNE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,135,SE,32.8,12.3,43.0,SW,11.4,8.5,43.0,WSW,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,63,SW,10.0,5.6,43.0,NW,7.8,6.7,30.0,WNW,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1267,68,SSW,9.9,-0.5,28.0,WSW,7.3,4.2,17.0,NE,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1268,56,N,10.1,-0.1,43.0,N,9.9,3.9,39.0,SE,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,63,N,7.4,2.2,46.0,N,7.7,4.8,33.0,S,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# df.to_csv('df_encoded.csv', sep=',', index=False)

In [10]:
df.drop(columns=['Ball_Minimum temperature (°C)','Fern_Minimum temperature (°C)','Wang_Minimum temperature (°C)','Ball_Direction of maximum wind gust','Fern_Direction of maximum wind gust', 'Wang_Direction of maximum wind gust'], inplace=True)

In [12]:
df.tail()

Unnamed: 0,Incident_count,Ball_Maximum temperature (°C),Ball_Speed of maximum wind gust (km/h),Fern_Maximum temperature (°C),Fern_Speed of maximum wind gust (km/h),Wang_Maximum temperature (°C),Wang_Speed of maximum wind gust (km/h),Ball_Direction of maximum wind gust_E,Ball_Direction of maximum wind gust_ENE,Ball_Direction of maximum wind gust_ESE,...,Wang_Direction of maximum wind gust_NW,Wang_Direction of maximum wind gust_None,Wang_Direction of maximum wind gust_S,Wang_Direction of maximum wind gust_SE,Wang_Direction of maximum wind gust_SSE,Wang_Direction of maximum wind gust_SSW,Wang_Direction of maximum wind gust_SW,Wang_Direction of maximum wind gust_W,Wang_Direction of maximum wind gust_WNW,Wang_Direction of maximum wind gust_WSW
1266,63,10.0,43.0,7.8,30.0,18.4,26.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1267,68,9.9,28.0,7.3,17.0,19.4,24.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1268,56,10.1,43.0,9.9,39.0,21.1,39.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1269,63,7.4,46.0,7.7,33.0,24.1,24.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1270,60,11.9,31.0,11.7,22.0,22.9,19.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
y = df.Incident_count.values
X = df.drop(columns="Incident_count").values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [23]:
# Preview the features data
print(X)

[[30.5 78.  18.9 ...  0.   0.   0. ]
 [16.4 61.  21.2 ...  0.   0.   0. ]
 [28.9 46.  10.7 ...  0.   1.   0. ]
 ...
 [10.1 43.   9.9 ...  0.   0.   0. ]
 [ 7.4 46.   7.7 ...  0.   0.   0. ]
 [11.9 31.  11.7 ...  0.   0.   0. ]]


In [24]:
# Instantiate a StandardScaler instance
scaler = StandardScaler()

# Fit the training data to the standard scaler
X_scaler = scaler.fit(X_train)

# Transform the training data using the scaler
X_train_scaled = X_scaler.transform(X_train)

# Transform the testing data using the scaler
X_test_scaled = X_scaler.transform(X_test)

In [25]:
print(X_train_scaled)

[[-0.67004137 -0.19249948  0.5706772  ... -0.35188173 -0.29014923
  -0.37773781]
 [-0.58513541 -0.95307993  0.17298838 ... -0.35188173 -0.29014923
   2.64733892]
 [ 0.81581293  0.33990683  0.90473581 ... -0.35188173 -0.29014923
  -0.37773781]
 ...
 [ 0.82996393 -0.34461557  0.79338294 ... -0.35188173 -0.29014923
   2.64733892]
 [-0.98136322 -0.19249948 -0.71783458 ... -0.35188173 -0.29014923
  -0.37773781]
 [ 1.24034273 -0.11644143  1.28651708 ... -0.35188173 -0.29014923
  -0.37773781]]


In [27]:
# Define the deep learning model 
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=1, activation="relu", input_dim=57))
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=20)
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
10/10 - 0s - loss: 365.1306 - accuracy: 0.0000e+00 - 103ms/epoch - 10ms/step
Loss: 365.1305847167969, Accuracy: 0.0


In [None]:
# First run, softmax x 2 # 10/10 - 0s - loss: -1.3612e+02 - accuracy: 0.0000e+00 - 148ms/epoch - 15ms/step
# Loss: -136.12448120117188, Accuracy: 0.0