# RICE-VIRT-DATA-PT-05-2022-U-B-MW Final Project

## Code Summary
- **Purpose  :** Evaluation of Crime Trends Machine Learning Resampling Algorithms 
- **Created  :** 2022 Sept 29 22:25:12 UTC (Meghan E. Hull)

## Dependencies

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
import re
from sqlalchemy import create_engine
import psycopg2

In [80]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import BayesianRidge
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

## Version Check

In [4]:
!python --version

Python 3.7.13


In [5]:
!conda list | findstr numpy

numpy                     1.21.5           py37h7a0a035_1  
numpy-base                1.21.5           py37hca35cd5_1  
numpydoc                  1.2                pyhd3eb1b0_0  


In [6]:
!conda list | findstr pandas

pandas                    1.3.5            py37h6214cd6_0  


In [7]:
!conda list | findstr scipy

scipy                     1.7.3            py37h0a974cb_0  


In [8]:
!conda list | findstr scikit-learn

scikit-learn              1.0.2            py37hf11a4ad_1  
scikit-learn-intelex      2021.5.0         py37haa95532_0  


In [9]:
!conda list | findstr imbalanced-learn

imbalanced-learn          0.9.0                    pypi_0    pypi


## Report Tables

In [8]:
summary_df = pd.DataFrame(columns=['Balanced Accuracy Score', 
                                   'High Risk Precision Score', 
                                   'Low Risk Precision Score', 
                                   'High Risk Recall Score', 
                                   'Low Risk Recall Score',
                                   'High Risk F1 Score', 
                                   'Low Risk F1 Score'])

# 1. Import & Prep Client Data

## 1.1 Import & Initial Cleaning of Data

In [9]:
# Import database password
from config import db_password

# Define connection string
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/Crime_AnalysisDB"

In [10]:
 # Connect to DB engine
engine = create_engine(db_string)

In [52]:
# Import harris_county data
yearly_crime_df= pd.read_sql_table('yearly_crime',con=engine)
harris_county_df = pd.read_sql_table('harris_crime',con=engine)
outter_hou_df= pd.read_sql_table('outter_county',con=engine)

In [53]:
harris_county_df=harris_county_df.drop('index', axis=1)

In [54]:
harris_county_df.columns

Index(['County', 'Year', 'Agency_Count', 'Murder', 'Rape', 'Assault',
       'Burglary', 'Larceny', 'Auto_Theft', 'Violent_Offenses',
       'NonViolent_Offenses', 'Total_Crime', 'Population'],
      dtype='object')

In [55]:
outter_hou_df=outter_hou_df.drop('Unnamed: 0', axis=1)
outter_hou_df=outter_hou_df.drop('index', axis=1)

In [56]:
outter_hou_df.columns

Index(['County', 'Year', 'Agency_Count', 'Murder', 'Rape', 'Assault',
       'Burglary', 'Larceny', 'Auto_Theft', 'Violent_Offenses',
       'NonViolent_Offenses', 'Total_Crime', 'Population'],
      dtype='object')

## 1.2 Split Data for Harris County

In [68]:
# Create target
y_Harris = pd.DataFrame(harris_county_df["Violent_Offenses"])

# Create features
X_Harris = pd.DataFrame(harris_county_df[['Year', 'Agency_Count','Population']])


In [69]:
X_Harris.head()

Unnamed: 0,Year,Agency_Count,Population
0,2015,42,4564664
1,2016,44,4646508
2,2017,44,4702468
3,2018,45,4753437
4,2019,45,4776485


In [70]:
y_Harris.head()

Unnamed: 0,Violent_Offenses
0,18684
1,21126
2,23222
3,23709
4,23021


## 1.3 Split data for outer Houston counties

In [71]:
# Create target
y_Out = pd.DataFrame(outter_hou_df["Violent_Offenses"])

# Create features
X_Out = pd.DataFrame(outter_hou_df[['Year', 'Agency_Count','Population']])

In [72]:
X_Out.dtypes

Year            int64
Agency_Count    int64
Population      int64
dtype: object

In [73]:
y_Out.head()

Unnamed: 0,Violent_Offenses
0,61
1,49
2,45
3,43
4,43


# 2. Linear Regression

In [74]:
# Institate a Linear Regression Model
model_01 = LinearRegression()

In [75]:
# Curve-fit existing data
model_01.fit(X_Harris, y_Harris)

LinearRegression()

In [77]:
y_pred_01 = model_01.predict(X_Harris)
print(y_pred_01.shape)

(6, 1)


# 3. Bayesian Regression

In [81]:
# Institate a Linear BayesianRidge Model
model_02=BayesianRidge()

In [82]:
# Curve-fit existing data
model_02.fit(X_Harris, y_Harris)

BayesianRidge()

In [83]:
y_pred_02 = model_02.predict(X_Harris)
print(y_pred_01.shape)

(6, 1)
