In [1]:
# Dependencies
import pandas as pd
import numpy as np
import os

In [2]:
#  import csv
df = pd.read_csv('../Resources/final_data.csv')
df.head()

Unnamed: 0,county,year,violent_crime,murder,rape,robbery,aggravated_assault,property_crime,burglary,larceny_theft,...,frm_15,points_15,median_hh_income,median_hh_inc_moe,poverty_count,poverty_count_moe,poverty_rate,poverty_rate_moe,county_fips,price
0,Atlantic,2019,0,0,0,0,0,0,0,0,...,3.391731,0.475,62678,2822,29057,4251,1.6,11.3,1,196067.42
1,Bergen,2019,2,0,2,0,0,46,2,44,...,3.391731,0.475,107971,3025,52980,7662,0.8,5.7,3,494018.42
2,Burlington,2019,0,0,0,0,0,0,0,0,...,3.391731,0.475,88443,3233,24961,4374,1.0,5.7,5,238593.67
3,Camden,2019,2,0,0,0,2,44,4,38,...,3.391731,0.475,73168,2374,53641,7048,1.4,10.7,7,181980.75
4,Cape May,2019,0,0,0,0,0,0,0,0,...,3.391731,0.475,66565,4753,8853,1981,2.2,9.8,9,389294.58


In [3]:
#  drop nulls
df.dropna(how='any', inplace = True)
# df.dtypes

In [4]:
# Set features. This will also be used as x values.
X = df.drop(['county', 'county_fips'], axis=1)
y = df["county"]
print(X.shape, y.shape)

(164, 22) (164,)


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
# Split data into training and testing groups and scale data 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train,21)
y_test_categorical = to_categorical(encoded_y_test,21)

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
deep_model = Sequential()
# Add first layer
deep_model.add(Dense(units=50, activation='relu', input_dim=22))
# Add second layer (deep)
deep_model.add(Dense(units=50, activation='relu'))
# Add third layer (deep)
deep_model.add(Dense(units=50, activation='relu'))
#  Add output layer
deep_model.add(Dense(units=21, activation='softmax'))

In [7]:
#  model summary
deep_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                1150      
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_3 (Dense)              (None, 21)                1071      
Total params: 7,321
Trainable params: 7,321
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Compile and fit the model
deep_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# Fit the model to the training data
deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2)

Train on 123 samples
Epoch 1/100
123/123 - 0s - loss: 3.0490 - accuracy: 0.0244
Epoch 2/100
123/123 - 0s - loss: 3.0242 - accuracy: 0.0488
Epoch 3/100
123/123 - 0s - loss: 3.0018 - accuracy: 0.0732
Epoch 4/100
123/123 - 0s - loss: 2.9820 - accuracy: 0.0732
Epoch 5/100
123/123 - 0s - loss: 2.9605 - accuracy: 0.0976
Epoch 6/100
123/123 - 0s - loss: 2.9421 - accuracy: 0.1220
Epoch 7/100
123/123 - 0s - loss: 2.9183 - accuracy: 0.1463
Epoch 8/100
123/123 - 0s - loss: 2.8975 - accuracy: 0.1138
Epoch 9/100
123/123 - 0s - loss: 2.8697 - accuracy: 0.1382
Epoch 10/100
123/123 - 0s - loss: 2.8394 - accuracy: 0.1626
Epoch 11/100
123/123 - 0s - loss: 2.8033 - accuracy: 0.1707
Epoch 12/100
123/123 - 0s - loss: 2.7667 - accuracy: 0.1951
Epoch 13/100
123/123 - 0s - loss: 2.7232 - accuracy: 0.1951
Epoch 14/100
123/123 - 0s - loss: 2.6773 - accuracy: 0.1951
Epoch 15/100
123/123 - 0s - loss: 2.6252 - accuracy: 0.2033
Epoch 16/100
123/123 - 0s - loss: 2.5736 - accuracy: 0.2358
Epoch 17/100
123/123 - 0s - 

<tensorflow.python.keras.callbacks.History at 0x20b385d77b8>

In [9]:
model_loss, model_accuracy = deep_model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

41/41 - 0s - loss: 0.1317 - accuracy: 0.9756
Deep Neural Network - Loss: 0.13173172204959682, Accuracy: 0.9756097793579102


In [10]:
prediction = np.argmax(deep_model.predict(X_test_scaled), axis = -1)
predicted_labels = label_encoder.inverse_transform(prediction)
print(f"Predicted Labels: {predicted_labels[:5]}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted Labels: ['Burlington' 'Cumberland' 'Union' 'Camden' 'Somerset']
Actual Labels: ['Burlington', 'Cumberland', 'Union', 'Camden', 'Morris']


In [11]:
# Save the model
deep_model.save("../Models/NJ_deep_learning.h5")

In [12]:
X_test.columns

Index(['year', 'violent_crime', 'murder', 'rape', 'robbery',
       'aggravated_assault', 'property_crime', 'burglary', 'larceny_theft',
       'motor_vehicle_theft', 'arson', 'frm_30', 'points_30', 'frm_15',
       'points_15', 'median_hh_income', 'median_hh_inc_moe', 'poverty_count',
       'poverty_count_moe', 'poverty_rate', 'poverty_rate_moe', 'price'],
      dtype='object')

In [13]:
input_data = X_test.drop(['year','price'],axis = 1)
input_data.columns

Index(['violent_crime', 'murder', 'rape', 'robbery', 'aggravated_assault',
       'property_crime', 'burglary', 'larceny_theft', 'motor_vehicle_theft',
       'arson', 'frm_30', 'points_30', 'frm_15', 'points_15',
       'median_hh_income', 'median_hh_inc_moe', 'poverty_count',
       'poverty_count_moe', 'poverty_rate', 'poverty_rate_moe'],
      dtype='object')

In [14]:
best_case = [input_data['violent_crime'].min(), input_data['murder'].min(), input_data['rape'].min(),
            input_data['robbery'].min(), input_data['aggravated_assault'].min(), input_data['property_crime'].min(),
            input_data['burglary'].min(), input_data['larceny_theft'].min(), input_data['motor_vehicle_theft'].min(),
            input_data['arson'].min(), input_data['frm_30'].min(), input_data['points_30'].min(),
            input_data['frm_15'].min(), input_data['points_15'].min(), input_data['median_hh_income'].max(),input_data['median_hh_inc_moe'].min(),
            input_data['poverty_count'].min(), input_data['poverty_count_moe'].min(), input_data['poverty_rate'].min(),
            input_data['poverty_rate_moe'].min()]

In [15]:
worst_case = [input_data['violent_crime'].max(), input_data['murder'].max(), input_data['rape'].max(),
            input_data['robbery'].max(), input_data['aggravated_assault'].max(), input_data['property_crime'].max(),
            input_data['burglary'].max(), input_data['larceny_theft'].max(), input_data['motor_vehicle_theft'].max(),
            input_data['arson'].max(), input_data['frm_30'].max(), input_data['points_30'].max(),
            input_data['frm_15'].max(), input_data['points_15'].max(), input_data['median_hh_income'].min(),
           input_data['median_hh_inc_moe'].max(),
            input_data['poverty_count'].max(), input_data['poverty_count_moe'].max(), input_data['poverty_rate'].max(),
            input_data['poverty_rate_moe'].max()]

In [16]:
print(best_case)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.6540384615384616, 0.4846153846153846, 2.9307692307692315, 0.4499999999999999, 113083, 1623, 4793, 918, 0.8, 3.9]


In [17]:
test_array = pd.DataFrame({'year':2020,'violent_crime':input_data['violent_crime'].min(), 'murder':input_data['murder'].min(), 'rape':input_data['rape'].min(),
            'robbery':input_data['robbery'].min(), 'aggravated_assault':input_data['aggravated_assault'].min(), 'property_crime': input_data['property_crime'].min(),
            'burglary':input_data['burglary'].min(), 'larceny_theft':input_data['larceny_theft'].min(), 'motor_vehicle_theft':input_data['motor_vehicle_theft'].min(),
            'arson':input_data['arson'].min(), 'frm_30':input_data['frm_30'].min(), 'points_30': input_data['points_30'].min(),
           'frm_15':input_data['frm_15'].min(), 'points_15':input_data['points_15'].min(), 'median_hh_income': input_data['median_hh_income'].max(),'median_hh_inc_moe':input_data['median_hh_inc_moe'].min(),
           'poverty_count': input_data['poverty_count'].min(),'poverty_count_moe': input_data['poverty_count_moe'].min(), 'poverty_rate':input_data['poverty_rate'].min(),
            'poverty_rate_moe':input_data['poverty_rate_moe'].min(),'county_fips':1},[0])
print(test_array)

   year  violent_crime  murder  rape  robbery  aggravated_assault  \
0  2020              0       0     0        0                   0   

   property_crime  burglary  larceny_theft  motor_vehicle_theft  ...  \
0               0         0              0                    0  ...   

   points_30    frm_15  points_15  median_hh_income  median_hh_inc_moe  \
0   0.484615  2.930769       0.45            113083               1623   

   poverty_count  poverty_count_moe  poverty_rate  poverty_rate_moe  \
0           4793                918           0.8               3.9   

   county_fips  
0            1  

[1 rows x 22 columns]
