In [5]:
# TensorFlow and tf.keras
import tensorflowjs as tfjs
import tensorflow as tf
# Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import math
from sklearn.model_selection import train_test_split


# Import data

In [8]:
dataset = pd.read_csv("fatal-police-shootings-data.csv")

# Data processing

In [11]:
# Number of row and columns
dataset.shape

(5416, 14)

In [14]:
# Missing values
index_drop = []
column_name = ["armed","age", "gender", "race", "signs_of_mental_illness", "threat_level",  "flee", "body_camera"]


for i in column_name:
    drop_column = list(dataset.loc[(dataset[i] == "undetermined")].index)
    index_drop += drop_column
    
    
index_drop
index_drop = list(dict.fromkeys(index_drop))

# New dataset without missing values
dataset = dataset.drop(labels=None, axis=0, index=index_drop, columns=None, level=None, inplace=False, errors='raise')

In [17]:
# Drop NaN
dataset = dataset.reset_index().dropna().set_index('index')

In [20]:
# Replace with White weapon 
dataset_1 = dataset.replace(list(dataset["armed"].unique())[2:len(list(dataset["armed"].unique()))], "White weapon")
    
dataset_1 

Unnamed: 0_level_0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,2015-01-04,shot,White weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,2015-01-04,shot,White weapon,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5408,5916,Rayshard Brooks,2020-06-12,shot,White weapon,27.0,M,B,Atlanta,GA,False,attack,Foot,True
5409,5925,Caine Van Pelt,2020-06-12,shot,gun,23.0,M,B,Crown Point,IN,False,attack,Car,False
5410,5918,Hannah Fizer,2020-06-13,shot,unarmed,25.0,F,W,Sedalia,MO,False,other,Not fleeing,False
5411,5921,William Slyter,2020-06-13,shot,gun,22.0,M,W,Kansas City,MO,False,other,Other,False


In [23]:
# Target
target = dataset_1["armed"]
print(target)
# Features 
features_names = ["age", "gender", "race", "signs_of_mental_illness", "threat_level",  "flee", "body_camera"]
features_row = dataset_1[features_names]

index
0                gun
1                gun
2            unarmed
3       White weapon
4       White weapon
            ...     
5408    White weapon
5409             gun
5410         unarmed
5411             gun
5413             gun
Name: armed, Length: 4181, dtype: object


In [26]:
target

index
0                gun
1                gun
2            unarmed
3       White weapon
4       White weapon
            ...     
5408    White weapon
5409             gun
5410         unarmed
5411             gun
5413             gun
Name: armed, Length: 4181, dtype: object

In [29]:
# One-hot encoding for features
features = pd.get_dummies(features_row, columns=['gender', 
                                             'race', 
                                             'signs_of_mental_illness', 
                                             'threat_level',  
                                             'flee', 
                                             'body_camera'])

In [32]:
features

Unnamed: 0_level_0,age,gender_F,gender_M,race_A,race_B,race_H,race_N,race_O,race_W,signs_of_mental_illness_False,signs_of_mental_illness_True,threat_level_attack,threat_level_other,flee_Car,flee_Foot,flee_Not fleeing,flee_Other,body_camera_False,body_camera_True
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,53.0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0
1,47.0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0
2,23.0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0
3,32.0,0,1,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,0
4,39.0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5408,27.0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1
5409,23.0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0
5410,25.0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0
5411,22.0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0


In [35]:
# transform target 
target = target.replace(["gun", "unarmed", "White weapon"], [0, 1, 2])

target

index
0       0
1       0
2       1
3       2
4       2
       ..
5408    2
5409    0
5410    1
5411    0
5413    0
Name: armed, Length: 4181, dtype: int64

In [38]:
# Target occurance
target.value_counts()

0    2553
2    1328
1     300
Name: armed, dtype: int64

In [41]:
# Model structure
model = tf.keras.Sequential([
    # First HL
    tf.keras.layers.Dense(15, activation='relu'),
    # Second HL
    tf.keras.layers.Dense(30, activation='relu'),
    # Thired HL
    tf.keras.layers.Dense(15, activation='relu'),
    # Output
    tf.keras.layers.Dense(3, activation='softmax')
]) 


# Compile
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])



# Data
model.fit(features, target, epochs=20,batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fec41f88c10>

In [44]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=0)

# Function NN

# Train

In [47]:
def train_NN(features_train, target_train, activation, epochs, batch_size, nb_HL, nb_neurones):
    
    
    # Def HL
    
    stock_HL_output = []
    for i in range(0, nb_HL):
        HL = tf.keras.layers.Dense(nb_neurones, activation=activation)
        stock_HL_output.append(HL)
        
        
    # Output
    output = tf.keras.layers.Dense(3, activation='softmax')
    stock_HL_output.append(output)
        
        

    # Model structure
    model = tf.keras.Sequential(stock_HL_output) 
        


    # Compile
    model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


    # Data
    model.fit(features_train, target_train, epochs=epochs, batch_size=batch_size,verbose=0)
    
    return model 

In [50]:
model_train = train_NN(features_train = features, target_train = target,batch_size = int(X_train.shape[0]/10), 
                       activation = 'sigmoid', 
                       epochs = 20, 
                       nb_HL = 4, 
                       nb_neurones = 10)

# Test

In [53]:
def test_NN(features_test, target_test, model):
    
    test_loss, test_acc = model.evaluate(features_test,  target_test, verbose = 0)

    
    return {"Loss":test_loss, "Accuracy":test_acc}

In [56]:
model_tested = test_NN(features_test = X_test , target_test = y_test, model = model_train)

In [59]:
model_tested

{'Loss': 0.8508785367012024, 'Accuracy': 0.6199203133583069}

In [62]:
model_tested.get("Accuracy")

0.6199203133583069

# Grid search

In [65]:
# Define our grid
grid_activation = ["relu", "sigmoid"]
grid_epochs = [10]
grid_nb_HL = [1,2]
nb_neurones =  [5]

In [68]:
# define Grid search
def grid_search(features_train, target_train, features_test, target_test, grid_activation, grid_epochs, grid_nb_HL, nb_neurones):
    # Create the np ndarray in order to stock meta-parameters associated to its accuracy

    stock_activation = []
    stock_epochs = []
    stock_nb_HL = []
    stock_neurones = []
    stock_accuracy = []

    increment = -1
    for activation in grid_activation:
        for nb_epoch in grid_epochs:
            for nb_HL in grid_nb_HL:
                for neurone in nb_neurones:

                    increment += 1

                    # train the model
                    model_trained = train_NN(features_train = features_train, target_train = target_train, batch_size = int(X_train.shape[0]/10), 
                           activation = activation, 
                           epochs = nb_epoch, 
                           nb_HL = nb_HL, 
                           nb_neurones = neurone)



                    # test the modle on the test set
                    model_tested = test_NN(features_test = features_test , target_test = target_test, model = model_trained)



                    stock_activation.append(activation) 
                    stock_epochs.append(nb_epoch)
                    stock_nb_HL.append(nb_HL) 
                    stock_neurones.append(neurone) 
                    stock_accuracy.append(model_tested.get("Accuracy")) 
                    
                    
    # trnasform list into np array
    stock_activation = np.array([stock_activation]).T
    stock_epochs = np.array([stock_epochs]).T
    stock_nb_HL = np.array([stock_nb_HL]).T
    stock_neurones = np.array([stock_neurones]).T
    stock_accuracy = np.array([stock_accuracy]).T
    
    result_grid_search = np.concatenate((stock_activation, stock_epochs,stock_nb_HL,stock_neurones,stock_accuracy), axis=1)
    
                    
                    
                    
    return result_grid_search

In [71]:
result_grid_search = grid_search(features_train = X_train, target_train = y_train, features_test = X_test, 
                                 target_test = y_test, grid_activation = grid_activation, grid_epochs = grid_epochs, 
            grid_nb_HL = grid_nb_HL, nb_neurones = nb_neurones)

In [74]:
result_grid_search

array([['relu', '10', '1', '5', '0.572908341884613'],
       ['relu', '10', '2', '5', '0.6199203133583069'],
       ['sigmoid', '10', '1', '5', '0.6199203133583069'],
       ['sigmoid', '10', '2', '5', '0.6199203133583069']], dtype='<U32')

In [77]:
# Transform np array to a pd dataframe
pandas_df = pd.DataFrame(result_grid_search)
pandas_df

Unnamed: 0,0,1,2,3,4
0,relu,10,1,5,0.572908341884613
1,relu,10,2,5,0.6199203133583069
2,sigmoid,10,1,5,0.6199203133583069
3,sigmoid,10,2,5,0.6199203133583069


In [80]:
# Sort the pd dataframe by the accuracy colmun in a ascending order.
pandas_df.sort_values(by=[4], inplace=True, ascending=False)

In [83]:
pandas_df

Unnamed: 0,0,1,2,3,4
1,relu,10,2,5,0.6199203133583069
2,sigmoid,10,1,5,0.6199203133583069
3,sigmoid,10,2,5,0.6199203133583069
0,relu,10,1,5,0.572908341884613


# Test other grid search

In [86]:
# Define our grid
grid_activation1 = ["relu", "sigmoid"]
grid_epochs1 = [30]
grid_nb_HL1 = [3,4]
nb_neurones1 =  [7]

In [89]:
# define Grid search
def grid_search(features_train, target_train, features_test, target_test, grid_activation1, grid_epochs1, grid_nb_HL1, nb_neurones1):
    # Create the np ndarray in order to stock meta-parameters associated to its accuracy

    stock_activation = []
    stock_epochs = []
    stock_nb_HL = []
    stock_neurones = []
    stock_accuracy = []

    increment = -1
    for activation in grid_activation1:
        for nb_epoch in grid_epochs1:
            for nb_HL in grid_nb_HL1:
                for neurone in nb_neurones1:

                    increment += 1

                    # train the model
                    model_trained = train_NN(features_train = features_train, target_train = target_train, batch_size = int(X_train.shape[0]/10), 
                           activation = activation, 
                           epochs = nb_epoch, 
                           nb_HL = nb_HL, 
                           nb_neurones = neurone)



                    # test the modle on the test set
                    model_tested = test_NN(features_test = features_test , target_test = target_test, model = model_trained)



                    stock_activation.append(activation) 
                    stock_epochs.append(nb_epoch)
                    stock_nb_HL.append(nb_HL) 
                    stock_neurones.append(neurone) 
                    stock_accuracy.append(model_tested.get("Accuracy")) 
                    
                    
    # trnasform list into np array
    stock_activation = np.array([stock_activation]).T
    stock_epochs = np.array([stock_epochs]).T
    stock_nb_HL = np.array([stock_nb_HL]).T
    stock_neurones = np.array([stock_neurones]).T
    stock_accuracy = np.array([stock_accuracy]).T
    
    result_grid_search = np.concatenate((stock_activation, stock_epochs,stock_nb_HL,stock_neurones,stock_accuracy), axis=1)
    
                    
                    
                    
    return result_grid_search

In [92]:
result_grid_search1 = grid_search(features_train = X_train, target_train = y_train, features_test = X_test, 
                                 target_test = y_test, grid_activation1 = grid_activation1, grid_epochs1 = grid_epochs1, 
            grid_nb_HL1 = grid_nb_HL1, nb_neurones1 = nb_neurones1)

In [95]:
# Transform np array to a pd dataframe
pandas_df1 = pd.DataFrame(result_grid_search1)
pandas_df

Unnamed: 0,0,1,2,3,4
1,relu,10,2,5,0.6199203133583069
2,sigmoid,10,1,5,0.6199203133583069
3,sigmoid,10,2,5,0.6199203133583069
0,relu,10,1,5,0.572908341884613


In [98]:
# Sort the pd dataframe by the accuracy colmun in a ascending order.
pandas_df1.sort_values(by=[4], inplace=True, ascending=False)

In [101]:
pandas_df1

Unnamed: 0,0,1,2,3,4
1,relu,30,4,7,0.6199203133583069
2,sigmoid,30,3,7,0.6199203133583069
3,sigmoid,30,4,7,0.6199203133583069
0,relu,30,3,7,0.6079681515693665


# Train NN based on "Best" meta-parameters

In [104]:
best_meta_parameters = list(pandas_df1.iloc[0,0:4])
best_meta_parameters

['relu', '30', '4', '7']

In [107]:
# Train our final NN with our "best" meta-parameters
model_train = train_NN(features_train = features, target_train = target, batch_size = int(X_train.shape[0]/10), 
                       activation = best_meta_parameters[0], 
                       epochs = int(best_meta_parameters[1]), 
                       nb_HL = int(best_meta_parameters[2]), 
                       nb_neurones = int(best_meta_parameters[3]))

In [110]:
model_train

<tensorflow.python.keras.engine.sequential.Sequential at 0x7fec28690890>

In [113]:
lst = [[25, "M", "H", False, "other", "Car", False], [55, "F", "W", True, "attack", "Not fleeing", False]]
    
new_instance = pd.DataFrame(lst, columns = ["age", "gender", "race", "signs_of_mental_illness", "threat_level",  "flee", "body_camera"])
new_instance

# Add new instances to features_row
new_and_trained = pd.concat([new_instance, features_row], ignore_index=True)


In [116]:
new_and_trained

Unnamed: 0,age,gender,race,signs_of_mental_illness,threat_level,flee,body_camera
0,25.0,M,H,False,other,Car,False
1,55.0,F,W,True,attack,Not fleeing,False
2,53.0,M,A,True,attack,Not fleeing,False
3,47.0,M,W,False,attack,Not fleeing,False
4,23.0,M,H,False,other,Not fleeing,False
...,...,...,...,...,...,...,...
4178,27.0,M,B,False,attack,Foot,True
4179,23.0,M,B,False,attack,Car,False
4180,25.0,F,W,False,other,Not fleeing,False
4181,22.0,M,W,False,other,Other,False


In [119]:
new_instance_dummies = pd.get_dummies(new_and_trained, columns=['gender', 
                                             'race', 
                                             'signs_of_mental_illness', 
                                             'threat_level',  
                                             'flee', 
                                             'body_camera'])

new_instance_dummies

Unnamed: 0,age,gender_F,gender_M,race_A,race_B,race_H,race_N,race_O,race_W,signs_of_mental_illness_False,signs_of_mental_illness_True,threat_level_attack,threat_level_other,flee_Car,flee_Foot,flee_Not fleeing,flee_Other,body_camera_False,body_camera_True
0,25.0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,0
1,55.0,1,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,1,0
2,53.0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0
3,47.0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,1,0,1,0
4,23.0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4178,27.0,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,1
4179,23.0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0
4180,25.0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,1,0
4181,22.0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,1,1,0


In [122]:
new_instance_dummies = new_instance_dummies.iloc[:2,]
new_instance_dummies.shape

(2, 19)

In [125]:
model_train.predict(new_instance_dummies)

array([[0.30986184, 0.16447537, 0.5256628 ],
       [0.57417977, 0.03253129, 0.39328897]], dtype=float32)

In [129]:
tfjs.converters.save_keras_model(model_train, '.keras_model')

In [1]:
import tensorflow as tf

In [2]:
print(tf.__version__)

2.4.1
