# Import Libraries

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Setting up the Data

In [6]:
data = pd.read_csv('Data/RefinedFile.csv')
data = data[["DATE OCC", "TIME OCC", "AREA", "Crm Cd", "Vict Age", "Status Desc"]] # Only keep these columns

## Format Date to Day of the Week
data['DATE OCC'] = pd.to_datetime(data['DATE OCC'])
data['DATE OCC'] = data['DATE OCC'].dt.dayofweek

## Selecting features and target variable
independent_vars = data.drop('Status Desc', axis=1)  # Independent Variable
dependent_var = data['Status Desc'].str.contains('arrest', case=False)  # Dependent Variables

## Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(independent_vars, dependent_var, test_size=0.2)

# Regression Test Function

In [7]:
def regression_test(class_weights:dict):

    print(f'Weighting Scheme: {" : ".join(str(weight) for weight in class_weights.values())} (Not Arrested : Arrested)')

    ## Initialize and train the Logistic Regression model with class weights
    model = LogisticRegression(class_weight=class_weights)
    model.fit(X_train, y_train)

    ## Perform a prediction on a subset of our data
    y_pred = model.predict(X_test)

    ## Evaluate the Model's Accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    ## Reports of Model
    print(classification_report(y_test, y_pred, zero_division=0.0))
    print("\n\n\n")

# Execute Script

In [8]:
if __name__ == "__main__":
    ## Give a higher weight to the Arrested since they are of minority in the data
    ## 0 - Not Arrested ; 1 - Arrested
    class_weights = {0: 1.0, 1: 6.5} 
    regression_test(class_weights)

Weighting Scheme: 1.0 : 6.5 (Not Arrested : Arrested)
Accuracy: 0.9023514329168757
              precision    recall  f1-score   support

       False       0.91      0.99      0.95    113829
        True       0.22      0.02      0.03     11754

    accuracy                           0.90    125583
   macro avg       0.56      0.51      0.49    125583
weighted avg       0.84      0.90      0.86    125583





