In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout

In [11]:
columns = ['Registration State','Plate Type','Violation Code','Vehicle Body Type','Vehicle Make','Violation Precinct',
            'Violation Time','Violation County','Violation Day','Violation Hour']
df = pd.read_csv(filepath_or_buffer= 'filtered_data2.csv', delimiter=',', header=None, names=columns, low_memory=False)

In [12]:
#target feature = Violation Code
y = df['Violation Code']
X = df.drop('Violation Code', axis = 1)

df = df[columns]

y = pd.get_dummies(y, columns=['Violation Code'])

#one-hot-encoding on raw data w/o ordinalencoder
X = pd.get_dummies(df,columns=columns)

In [13]:
x_train, x_test, y_train, y_test = train_test_split (X,y, test_size=0.3)
print("Length of train set x:",x_train.shape[0],"y:",y_train.shape[0])
print("Length of test set x:",x_test.shape[0],"y:",y_test.shape[0])
print(f"Train shape : {x_train.shape}, Y Train : {y_train.shape}")
print(x_train.shape[1:])
print("Shape of y_train",y_train.shape)
print("Shape of y_test",y_test.shape)
print("Shape of x_train",x_train.shape)
print("Shape of x_test",x_test.shape)
print(y_train.shape[1:])

Length of train set x: 2311597 y: 2311597
Length of test set x: 990685 y: 990685
Train shape : (2311597, 2086), Y Train : (2311597, 9)
(2086,)
Shape of y_train (2311597, 9)
Shape of y_test (990685, 9)
Shape of x_train (2311597, 2086)
Shape of x_test (990685, 2086)
(9,)


In [14]:
model = tf.keras.models.Sequential()
model.add(keras.layers.Input(shape=x_train.shape[1:]))
model.add(keras.layers.Dense(512,activation='relu'))
model.add(Dropout(0.2))
model.add(keras.layers.Dense(100,activation='relu'))
model.add(Dropout(0.2))
model.add(keras.layers.Dense(9,activation='softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               1068544   
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_4 (Dense)             (None, 100)               51300     
                                                                 
 dropout_3 (Dropout)         (None, 100)               0         
                                                                 
 dense_5 (Dense)             (None, 9)                 909       
                                                                 
Total params: 1,120,753
Trainable params: 1,120,753
Non-trainable params: 0
_________________________________________________________________


In [15]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), 
              loss=keras.losses.CategoricalCrossentropy(),
             metrics=['accuracy'])

In [16]:
history = model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test), verbose=2)

Epoch 1/5
72238/72238 - 362s - loss: 0.0010 - accuracy: 0.9997 - val_loss: 6.1096e-06 - val_accuracy: 1.0000 - 362s/epoch - 5ms/step
Epoch 2/5
72238/72238 - 335s - loss: 5.0435e-11 - accuracy: 1.0000 - val_loss: 5.8284e-06 - val_accuracy: 1.0000 - 335s/epoch - 5ms/step
Epoch 3/5
72238/72238 - 344s - loss: 5.5682e-10 - accuracy: 1.0000 - val_loss: 5.2835e-06 - val_accuracy: 1.0000 - 344s/epoch - 5ms/step
Epoch 4/5
72238/72238 - 328s - loss: 3.0839e-11 - accuracy: 1.0000 - val_loss: 5.5091e-06 - val_accuracy: 1.0000 - 328s/epoch - 5ms/step
Epoch 5/5
72238/72238 - 199s - loss: 7.1167e-12 - accuracy: 1.0000 - val_loss: 5.7541e-06 - val_accuracy: 1.0000 - 199s/epoch - 3ms/step


In [17]:
model.evaluate(x_test, y_test)



[5.754063295171363e-06, 0.9999989867210388]