In [1]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import numpy as np
import pmdarima as pm




In [2]:
def corriger_encodage(df):
    # Fonction pour corriger l'encodage d'une chaîne de caractères
    def corriger_chaine(chaine):
        if isinstance(chaine, str):
            try:
                return chaine.encode('latin1').decode('utf-8')
            except UnicodeEncodeError:
                return chaine
        return chaine

    # Corriger les valeurs dans le DataFrame
    for col in df.columns:
        df[col] = df[col].apply(corriger_chaine)

    # Corriger les noms de colonnes
    df.columns = [corriger_chaine(col) for col in df.columns]

    return df

In [3]:
df = corriger_encodage(pd.read_csv('./data/traffic_to_ml.csv', encoding='unicode_escape').drop(columns=["Unnamed: 0"]))

In [4]:
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns

In [5]:
num_features = len(numerical_cols) - 1

In [6]:
model = Sequential()
model.add(Dense(128, input_dim=num_features, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Output layer for regression

# Output layer - adjust according to your problem
# For regression: No activation function
# For binary classification: 1 neuron, 'sigmoid' activation
# For multi-class classification: n neurons (n = number of classes), 'softmax' activation
# Compile the model
model.compile(optimizer = 'rmsprop',
                loss = 'mse',
                metrics = ['mae'])
# Model summary
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               6528      
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 16897 (66.00 KB)
Trainable params: 16897

## Fatalties

In [7]:
data       = df[numerical_cols].sample(frac=1., axis=0)
data_train = data.sample(frac=0.7, axis=0)
data_test  = data.drop(data_train.index)

# ---- Split => x,y (medv is price)
#
x_train = data_train.drop('Fatalties',  axis=1)
y_train = data_train['Fatalties']
x_test  = data_test.drop('Fatalties',   axis=1)
y_test  = data_test['Fatalties']

In [8]:
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean)/std
x_test  = (x_test - mean)/std

x_train, y_train = np.array(x_train), np.array(y_train)
x_test,  y_test  = np.array(x_test), np.array(y_test)

In [9]:
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=60,
    validation_data = (x_test, y_test)
)

Epoch 1/60


Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [10]:
score = history.history["mae"]

print('x_test / loss      : {:5.4f}'.format(score[0]))
print('x_test / mae       : {:5.4f}'.format(score[1]))

x_test / loss      : 0.2177
x_test / mae       : 0.2396


In [11]:
predictions = model.predict(x_test)



In [12]:
print("Prediction : {:.4f}".format(predictions[0][0]))

Prediction : 0.1258


## Serious Injuries

In [13]:
data       = df[numerical_cols].sample(frac=1., axis=0)
data_train = data.sample(frac=0.7, axis=0)
data_test  = data.drop(data_train.index)

# ---- Split => x,y (medv is price)
#
x_train = data_train.drop('Serious Injuries',  axis=1)
y_train = data_train['Serious Injuries']
x_test  = data_test.drop('Serious Injuries',   axis=1)
y_test  = data_test['Serious Injuries']

In [14]:
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean)/std
x_test  = (x_test - mean)/std

x_train, y_train = np.array(x_train), np.array(y_train)
x_test,  y_test  = np.array(x_test), np.array(y_test)

In [15]:
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=60,
    validation_data = (x_test, y_test)
)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [16]:
score = history.history["mae"]

print('x_test / loss      : {:5.4f}'.format(score[0]))
print('x_test / mae       : {:5.4f}'.format(score[1]))

x_test / loss      : 0.7149
x_test / mae       : 0.3699


In [17]:
predictions = model.predict(x_test)



In [18]:
print("Prediction : {:.4f}".format(predictions[0][0]))

Prediction : 0.9954


## Total Victims

In [19]:
data       = df[numerical_cols].sample(frac=1., axis=0)
data_train = data.sample(frac=0.7, axis=0)
data_test  = data.drop(data_train.index)

# ---- Split => x,y (medv is price)
#
x_train = data_train.drop('Serious Injuries',  axis=1)
y_train = data_train['Serious Injuries']
x_test  = data_test.drop('Serious Injuries',   axis=1)
y_test  = data_test['Serious Injuries']

In [20]:
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean)/std
x_test  = (x_test - mean)/std

x_train, y_train = np.array(x_train), np.array(y_train)
x_test,  y_test  = np.array(x_test), np.array(y_test)

In [21]:
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=60,
    validation_data = (x_test, y_test)
)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [22]:
score = history.history["mae"]

print('x_test / loss      : {:5.4f}'.format(score[0]))
print('x_test / mae       : {:5.4f}'.format(score[1]))

x_test / loss      : 0.2013
x_test / mae       : 0.2006


In [23]:
predictions = model.predict(x_test)



In [24]:
print("Prediction : {:.4f}".format(predictions[0][0]))

Prediction : 0.9900


## Date

In [25]:
df['Date'] = pd.to_datetime(df['Date'])  # Ensure 'Date' is a datetime object
df.set_index('Date', inplace=True)  # Set 'Date' as the index

monthly_accidents = df.resample('M').size()

In [26]:
train = monthly_accidents[:'2018-01-01']  # Replace with your split date
test = monthly_accidents['2018-01-01':]  # Replace with your split date

# Build and fit the ARIMA model
model = pm.auto_arima(monthly_accidents, seasonal=True, m=1,
                      start_p=0, start_q=0, 
                      max_p=10, max_q=10, 
                      start_P=0, start_Q=0, 
                      max_P=10, max_Q=10, 
                      d=None, D=None, 
                      trace=True,
                      error_action='ignore',  
                      suppress_warnings=True, 
                      stepwise=True)

Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=1320.489, Time=0.06 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=1308.285, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=1300.348, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=1318.505, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.19 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=1289.273, Time=0.09 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.24 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=1285.273, Time=0.14 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=inf, Time=0.32 sec
 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=inf, Time=0.26 sec
 ARIMA(1,1,4)(0,0,0)[0] intercept   : AIC=inf, Time=0.36 sec
 ARIMA(0,1,3)(0,0,0)[0]             : AIC=1286.173, Time=0.07 sec

Best model:  ARIMA(0,1,3)(0,0,0)[0] intercept
Total fit time: 1.831 seconds


In [27]:
print(model.summary())

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                  144
Model:               SARIMAX(0, 1, 3)   Log Likelihood                -637.637
Date:                Thu, 25 Jan 2024   AIC                           1285.273
Time:                        12:29:11   BIC                           1300.088
Sample:                    01-31-2010   HQIC                          1291.293
                         - 12-31-2021                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     -0.3423      0.135     -2.537      0.011      -0.607      -0.078
ma.L1         -0.5489      0.064     -8.571      0.000      -0.674      -0.423
ma.L2         -0.1981      0.093     -2.133      0.0

In [28]:
model.predict()

2022-01-31    111.831703
2022-02-28    117.149975
2022-03-31    117.420747
2022-04-30    117.078425
2022-05-31    116.736103
2022-06-30    116.393780
2022-07-31    116.051458
2022-08-31    115.709135
2022-09-30    115.366813
2022-10-31    115.024490
Freq: M, dtype: float64