In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import requests
import json
import os
import glob
import plotly.express as px

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# import dataset
df = pd.read_csv('../Resources/gun_law_provisions_by_state_2016.csv')
df.head(5)

Unnamed: 0,state,deaths_per_capita,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
0,Alabama,21.5,0,1,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,10
1,Alaska,23.3,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
2,Arizona,15.2,1,1,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
3,Arkansas,17.8,1,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
4,California,7.9,1,1,0,1,0,0,1,0,...,1,1,1,1,1,1,1,1,1,104


In [4]:
# set index
df = df.set_index('state')

df.head(5)

Unnamed: 0_level_0,deaths_per_capita,felony,invcommitment,invoutpatient,danger,drugmisdemeanor,alctreatment,alcoholism,relinquishment,violent,...,expartedating,dvrosurrender,dvrosurrendernoconditions,dvrosurrenderdating,expartesurrender,expartesurrendernoconditions,expartesurrenderdating,dvroremoval,stalking,lawtotal
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,21.5,0,1,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,10
Alaska,23.3,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
Arizona,15.2,1,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,8
Arkansas,17.8,1,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
California,7.9,1,1,0,1,0,0,1,0,1,...,1,1,1,1,1,1,1,1,1,104


In [5]:
# import data on laws, # of states with law applicable and average deaths by states with law
df2 = pd.read_csv('../Resources/gun_laws_average_deaths_by_law.csv', encoding = "ISO-8859-1")
df2.head(5)

Unnamed: 0,law_tag,Variable_Name,Average_deaths_per_capita,number_of_states_with_law,Category,Sub-Category,Brief_Description_of_Provision
0,felony,felony,12.85,37,Prohibitions for high-risk gun possession,Felony,Firearm possession is prohibited for all peopl...
1,age18longgunpossess,age18longgunpossess,9.92,12,Possession regulations,Age restrictions,No possession of long guns until age 18
2,age18longgunsale,age18longgunsale,11.77,20,Buyer regulations,Age restrictions,Purchase of long guns from licensed dealers an...
3,age21handgunpossess,age21handgunpossess,5.95,8,Possession regulations,Age restrictions,No possession of handguns until age 21
4,age21handgunsale,age21handgunsale,8.81,14,Buyer regulations,Age restrictions,Purchase of handguns from licensed dealers and...


In [6]:
# drop rows where 3 or less states have law
df2 = df2[df2['number_of_states_with_law'] > 3]
df2.sample(5)

Unnamed: 0,law_tag,Variable_Name,Average_deaths_per_capita,number_of_states_with_law,Category,Sub-Category,Brief_Description_of_Provision
2,age18longgunsale,age18longgunsale,11.77,20,Buyer regulations,Age restrictions,Purchase of long guns from licensed dealers an...
46,domestic_violence_surrendernoconditions,dvrosurrendernoconditions,9.37,13,Domestic violence,Restraining order,No additional finding is required before the f...
34,college_campas_ban_concealed,collegeconcealed,11.9,12,Possession regulations,Campus carry,"No gun carrying on college campuses, including..."
36,dealer,dealer,6.63,7,Dealer regulations,Licensing,State dealer license required for sale of all ...
42,domestic_violence_dating,dvrodating,9.86,16,Domestic violence,Restraining order,DVROs are automatically prohibiting if the sub...


In [7]:
# Importing the dataset for ML
ml_df = df[['lawtotal', 'deaths_per_capita']].copy()
X = ml_df.iloc[:, :-1].values
y = ml_df.iloc[:, 1].values
ml_df

Unnamed: 0_level_0,lawtotal,deaths_per_capita
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,10,21.5
Alaska,3,23.3
Arizona,8,15.2
Arkansas,11,17.8
California,104,7.9
Colorado,30,14.3
Connecticut,88,4.6
Delaware,38,11.0
Florida,21,12.6
Georgia,6,15.0


In [8]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)

In [9]:
# Fitting Simple Linear Regression to the Training set
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [10]:
# Predicting the Test set results
y_pred = regressor.predict(X_test)

In [11]:
# Saving model to disk
pickle.dump(regressor, open('model.pkl','wb'))

In [12]:
# Loading model to compare the results
model = pickle.load( open('model.pkl','rb'))
print(model.predict([[60]]))

[9.09755953]
