In [2]:
# Dependencies
import numpy as np
import pandas as pd

## Data Pre-Processing

In [4]:
crime = pd.read_csv('../training_data/train_data_binary_fbi_cat.csv')
crime = crime.drop("mocodes", axis=1)
crime_cln = crime.dropna()
crime.head()

Unnamed: 0,dr_no,area_id,date_occ,date_rptd,longitude,latitude,premis_cd,rpt_dist_no,hour_occ,minute_occ,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,9999
0,210,10,20170808,20170808,-118.48,34.1903,301.0,1039,2,0,...,0,0,0,0,0,0,0,0,0,0
1,214,14,20170801,20170801,-118.4262,34.0106,104.0,1435,2,0,...,0,0,0,0,0,0,0,0,0,0
2,405,5,20181031,20181102,-118.2468,33.7926,101.0,519,21,0,...,0,0,0,0,0,0,0,0,0,0
3,415,15,20170822,20170822,-118.4137,34.1867,108.0,1523,13,45,...,0,0,0,0,0,0,0,0,0,0
4,418,18,20130318,20130319,-118.2717,33.942,101.0,1823,20,30,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = crime_cln.drop("fbi_part_1", axis=1)
y = crime_cln["fbi_part_1"]
print(X.shape, y.shape)

(870154, 579) (870154,)


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

Using TensorFlow backend.


# Create a Deep Learning Model

In [32]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Create model and add layers
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=579))
model.add(Dropout(0.1))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(units=2, activation='softmax'))

In [33]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
 - 46s - loss: 0.0582 - acc: 0.9828
Epoch 2/10
 - 45s - loss: 0.0502 - acc: 0.9853
Epoch 3/10
 - 45s - loss: 0.0485 - acc: 0.9858
Epoch 4/10
 - 45s - loss: 0.0477 - acc: 0.9859
Epoch 5/10
 - 45s - loss: 0.0470 - acc: 0.9863
Epoch 6/10
 - 45s - loss: 0.0467 - acc: 0.9863
Epoch 7/10
 - 45s - loss: 0.0464 - acc: 0.9864
Epoch 8/10
 - 48s - loss: 0.0465 - acc: 0.9865
Epoch 9/10
 - 47s - loss: 0.0462 - acc: 0.9866
Epoch 10/10
 - 48s - loss: 0.0460 - acc: 0.9866


<keras.callbacks.History at 0x21cfc5b0eb8>

## Quantify our Trained Model

In [34]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Deep Neural Network - Loss: 0.04955753476648347, Accuracy: 0.9856991160205756


## Make Predictions

In [35]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

  if diff:


In [36]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['Property' 'Violent' 'Property' 'Property' 'Property']
Actual Labels: ['Property', 'Violent', 'Property', 'Property', 'Property']


## Save the Trained Model

In [37]:
model.save("violent_crime_model_trained.h5")

## Load FBI Crime Category Data

In [38]:
fbi_crime = pd.read_csv('../training_data/train_data_8_fbi_cat.csv')
fbi_crime = fbi_crime.drop("mocodes", axis=1)
fbi_crime_cln = fbi_crime.dropna()
fbi_crime_cln.head()

Unnamed: 0,dr_no,area_id,date_occ,date_rptd,longitude,latitude,premis_cd,rpt_dist_no,hour_occ,minute_occ,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,9999
0,210,10,20170808,20170808,-118.48,34.1903,301.0,1039,2,0,...,0,0,0,0,0,0,0,0,0,0
1,214,14,20170801,20170801,-118.4262,34.0106,104.0,1435,2,0,...,0,0,0,0,0,0,0,0,0,0
2,405,5,20181031,20181102,-118.2468,33.7926,101.0,519,21,0,...,0,0,0,0,0,0,0,0,0,0
3,415,15,20170822,20170822,-118.4137,34.1867,108.0,1523,13,45,...,0,0,0,0,0,0,0,0,0,0
4,418,18,20130318,20130319,-118.2717,33.942,101.0,1823,20,30,...,0,0,0,0,0,0,0,0,0,0


In [39]:
X = fbi_crime_cln.drop("FBI_Category", axis=1)
y = fbi_crime_cln["FBI_Category"]
print(X.shape, y.shape)

(870154, 579) (870154,)


In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model (fbi_crime)

In [41]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Create model and add layers
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=579))
model.add(Dropout(0.1))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(units=8, activation='softmax'))

In [43]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
 - 50s - loss: 0.3926 - acc: 0.8601
Epoch 2/10
 - 49s - loss: 0.3909 - acc: 0.8608
Epoch 3/10
 - 49s - loss: 0.3900 - acc: 0.8613
Epoch 4/10
 - 49s - loss: 0.3887 - acc: 0.8615
Epoch 5/10
 - 49s - loss: 0.3883 - acc: 0.8621
Epoch 6/10
 - 49s - loss: 0.3879 - acc: 0.8626
Epoch 7/10
 - 50s - loss: 0.3868 - acc: 0.8626
Epoch 8/10
 - 52s - loss: 0.3868 - acc: 0.8627
Epoch 9/10
 - 53s - loss: 0.3859 - acc: 0.8631
Epoch 10/10
 - 52s - loss: 0.3864 - acc: 0.8629


<keras.callbacks.History at 0x2220f5c7198>

## Quantify our Trained Model

In [44]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.372228353437544, Accuracy: 0.8672238081447464


## Make Predictions

In [45]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

  if diff:


In [46]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['Aggravated Assault' 'BTFV' 'BTFV' 'BTFV' 'Aggravated Assault']
Actual Labels: ['Aggravated Assault', 'BTFV', 'BTFV', 'BTFV', 'Aggravated Assault']


In [47]:
model.save("fbi_category_model_trained.h5")