In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import imblearn
from pandasql import sqldf

# Data Processessing and Cleaning

In [2]:
#Read Data

data = pd.read_csv('Open_Parking_and_Camera_Violations_20231203.csv')


In [3]:
#Clean data by droping nulls, dropping bad precinct data, and selection relevant features
#Drop rows where fine amount is less than equal to zero

data = data.dropna()
data = data[(data['Precinct'] >= 1) & (data['Precinct'] <= 124)]
data = data[['State', 'License Type', 'Violation', 'Fine Amount', 'Violation Time', 'Issue Date', 'Precinct']]
data['Issue Date'] = pd.to_datetime(data['Issue Date'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
data = data[data['Fine Amount'] > 0]
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 746597 entries, 2 to 1048574
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   State           746597 non-null  object 
 1   License Type    746597 non-null  object 
 2   Violation       746597 non-null  object 
 3   Fine Amount     746597 non-null  float64
 4   Violation Time  746597 non-null  object 
 5   Issue Date      746597 non-null  object 
 6   Precinct        746597 non-null  float64
dtypes: float64(2), object(5)
memory usage: 45.6+ MB


In [4]:
data.head()

Unnamed: 0,State,License Type,Violation,Fine Amount,Violation Time,Issue Date,Precinct
2,NY,PAS,FIRE HYDRANT,115.0,07:04A,2021-12-16,49.0
4,NY,PAS,NO STANDING-DAY/TIME LIMITS,115.0,08:06A,2016-06-06,72.0
5,NJ,PAS,NO PARKING-DAY/TIME LIMITS,60.0,01:37P,2017-10-25,62.0
6,NY,PAS,NO PARKING-STREET CLEANING,45.0,09:36A,2017-10-26,70.0
7,NY,PAS,FAIL TO DSPLY MUNI METER RECPT,35.0,01:35P,2017-12-18,70.0


In [5]:
#Further clean up data by encoding features and renaming them through SQL
#All of the violations are encoded into features
#Precinct numbers are encoded to Boroughs features

SQL_Func = lambda q: sqldf(q, globals())

query = """
    select 
        case when lower(`State`) = 'ny' then 1 else 0 end as in_state,
        case when lower(`License Type`) = 'pas' then 1 else 0 end as passenger_car,
        
        case when 
            (case when substr(`Violation Time`, -1) = 'A' then substr(`Violation Time`, 1, 2)
            else cast(cast(substr(`Violation Time`, 1, 2) as int) + 12 as text) end) between '09' and '18' 
            then 1 else 0 end as business_hours,
            strftime('%Y', `Issue Date`) as year,
        case when strftime('%w', `Issue Date`) = '0' or strftime('%w', `Issue Date`) = '6' then 1 else 0 end as is_weekend,

        case when Precinct >= 1 AND Precinct <= 40 then 1 else 0 end as Manhattan,
        case when Precinct >= 40 AND Precinct <= 60 then 1 else 0 end as Bronx,
        case when Precinct >= 60 AND Precinct <= 100 then 1 else 0 end as Brooklyn,
        case when Precinct >= 100 AND Precinct <= 120 then 1 else 0 end as Queens,
        case when Precinct >= 120 AND Precinct <= 125 then 1 else 0 end as Staten_Island,

        case when Violation = 'NO PARKING-STREET CLEANING' then 1 else 0 end as NO_PARKING_STREET_CLEANING,
        case when Violation = 'INSP. STICKER-EXPIRED/MISSING' then 1 else 0 end as INSP_STICKER_EXPIRED_MISSING,
        case when Violation = 'FAIL TO DSPLY MUNI METER RECPT' then 1 else 0 end as FAIL_TO_DSPLY_MUNI_METER_RECPT,
        case when Violation = 'NO STANDING-DAY/TIME LIMITS' then 1 else 0 end as NO_STANDING_DAY_TIME_LIMITS,
        case when Violation = 'FIRE HYDRANT' then 1 else 0 end as FIRE_HYDRANT,
        case when Violation = 'REG. STICKER-EXPIRED/MISSING' then 1 else 0 end as REG_STICKER_EXPIRED_MISSING,
        case when Violation = 'NO PARKING-DAY/TIME LIMITS' then 1 else 0 end as NO_PARKING_DAY_TIME_LseIMITS,
        case when Violation = 'DOUBLE PARKING' then 1 else 0 end as DOUBLE_PARKING,
        case when Violation = 'EXPIRED MUNI METER' then 1 else 0 end as EXPIRED_MUNI_METER,
        case when Violation = 'FRONT OR BACK PLATE MISSING' then 1 else 0 end as FRONT_OR_BACK_PLATE_MISSING,
        case when Violation = 'NO STANDING-BUS STOP' then 1 else 0 end as NO_STANDING_BUS_STOP,
        case when Violation = 'NO STANDING-COMM METER ZONE' then 1 else 0 end as NO_STANDING_COMM_METER_ZONE,
        case when Violation = 'FAIL TO DISP. MUNI METER RECPT' then 1 else 0 end as FAIL_TO_DISP_MUNI_METER_RECPT,
        case when Violation = 'NO STANDING-EXC. TRUCK LOADING' then 1 else 0 end as NO_STANDING_EXC_TRUCK_LOADING,
        case when Violation = 'CROSSWALK' then 1 else 0 end as CROSSWALK,
        case when Violation = 'NO STANDING-EXC. AUTH. VEHICLE' then 1 else 0 end as NO_STANDING_EXC_AUTH_VEHICLE,
        case when Violation = 'DOUBLE PARKING-MIDTOWN COMML' then 1 else 0 end as DOUBLE_PARKING_MIDTOWN_COMML,
        case when Violation = 'SIDEWALK' then 1 else 0 end as SIDEWALK,
        case when Violation = 'BIKE LANE' then 1 else 0 end as BIKE_LANE,
        case when Violation = 'NO MATCH-PLATE/STICKER' then 1 else 0 end as NO_MATCH_PLATE_STICKER,
        case when Violation = 'PLTFRM LFTS LWRD POS COMM VEH' then 1 else 0 end as PLTFRM_LFTS_LWRD_POS_COMM_VEH,
        case when Violation = 'SAFETY ZONE' then 1 else 0 end as SAFETY_ZONE,
        case when Violation = 'OBSTRUCTING DRIVEWAY' then 1 else 0 end as OBSTRUCTING_DRIVEWAY,
        case when Violation = 'NO STOPPING-DAY/TIME LIMITS' then 1 else 0 end as NO_STOPPING_DAY_TIME_LIMITS,
        case when Violation = 'COMML PLATES-UNALTERED VEHICLE' then 1 else 0 end as COMML_PLATES_UNALTERED_VEHICLE,
        case when Violation = 'NGHT PKG ON RESID STR-COMM VEH' then 1 else 0 end as NGHT_PKG_ON_RESID_STR_COMM_VEH,
        case when Violation = 'EXPIRED MUNI MTR-COMM MTR ZN' then 1 else 0 end as EXPIRED_MUNI_MTR_COMM_MTR_ZN,
        case when Violation = 'OBSTRUCTING TRAFFIC/INTERSECT' then 1 else 0 end as OBSTRUCTING_TRAFFIC_INTERSECT,
        case when Violation = 'NO STANDING-BUS LANE' then 1 else 0 end as NO_STANDING_BUS_LANE,
        case when Violation = 'INSP STICKER-MUTILATED/CFEIT' then 1 else 0 end as INSP_STICKER_MUTILATED_C_FEIT,
        case when Violation = 'STORAGE-3HR COMMERCIAL' then 1 else 0 end as STORAGE_3HR_COMMERCIAL,
        case when Violation = 'NON-COMPLIANCE W/ POSTED SIGN' then 1 else 0 end as NON_COMPLIANCE_W_POSTED_SIGN,
        case when Violation = 'NO STANDING-TAXI STAND' then 1 else 0 end as NO_STANDING_TAXI_STAND,
        case when Violation = 'NO PARKING-EXC. AUTH. VEHICLE' then 1 else 0 end as NO_PARKING_EXC_AUTH_VEHICLE,
        case when Violation = 'PEDESTRIAN RAMP' then 1 else 0 end as PEDESTRIAN_RAMP,
        case when Violation = 'PARKED BUS-EXC. DESIG. AREA' then 1 else 0 end as PARKED_BUS_EXC_DESIG_AREA,
        case when Violation = 'WRONG WAY' then 1 else 0 end as WRONG_WAY,
        case when Violation = 'SELLING/OFFERING MCHNDSE-METER' then 1 else 0 end as SELLING_OFFERING_MCHNDSE_METER,
        case when Violation = 'EXPIRED METER' then 1 else 0 end as EXPIRED_METER,
        case when Violation = 'NO STANDING-HOTEL LOADING' then 1 else 0 end as NO_STANDING_HOTEL_LOADING,
        case when Violation = 'REG STICKER-MUTILATED/CFEIT' then 1 else 0 end as REG_STICKER_MUTILATED_C_FEIT,
        case when Violation = 'NO STANDING EXCP D/S' then 1 else 0 end as NO_STANDING_EXCP_D_S,
        case when Violation = 'ANGLE PARKING' then 1 else 0 end as ANGLE_PARKING,
        case when Violation = 'IMPROPER REGISTRATION' then 1 else 0 end as IMPROPER_REGISTRATION,
        case when Violation = 'NO PARKING-EXC. HNDICAP PERMIT' then 1 else 0 end as NO_PARKING_EXC_HNDICAP_PERMIT,
        case when Violation = 'TRAFFIC LANE' then 1 else 0 end as TRAFFIC_LANE,
        case when Violation = 'BEYOND MARKED SPACE' then 1 else 0 end as BEYOND_MARKED_SPACE,
        case when Violation = 'NO PARKING-EXC. HOTEL LOADING' then 1 else 0 end as NO_PARKING_EXC_HOTEL_LOADING,
        case when Violation = 'MISSING EQUIPMENT' then 1 else 0 end as MISSING_EQUIPMENT,
        case when Violation = 'FEEDING METER' then 1 else 0 end as FEEDING_METER,
        case when Violation = 'OVERTIME PKG-TIME LIMIT POSTED' then 1 else 0 end as OVERTIME_PKG_TIME_LIMIT_POSTED,
        case when Violation = 'DETACHED TRAILER' then 1 else 0 end as DETACHED_TRAILER,
        case when Violation = 'TUNNEL/ELEVATED/ROADWAY' then 1 else 0 end as TUNNEL_ELEVATED_ROADWAY,
        case when Violation = 'UNAUTHORIZED BUS LAYOVER' then 1 else 0 end as UNAUTHORIZED_BUS_LAYOVER,
        case when Violation = 'NO PARKING-TAXI STAND' then 1 else 0 end as NO_PARKING_TAXI_STAND,
        case when Violation = 'OTHER' then 1 else 0 end as OTHER,
        case when Violation = 'VIN OBSCURED' then 1 else 0 end as VIN_OBSCURED,
        case when Violation = 'IDLING' then 1 else 0 end as IDLING,
        case when Violation = 'NO STD(EXC TRKS/GMTDST NO-TRK)' then 1 else 0 end as NO_STD_EXC_TRKS_GMTDST_NO_TRK,
        case when Violation = 'OVERNIGHT TRACTOR TRAILER PKG' then 1 else 0 end as OVERNIGHT_TRACTOR_TRAILER_PKG,
        case when Violation = 'INTERSECTION' then 1 else 0 end as INTERSECTION,
        case when Violation = 'UNALTERED COMM VEHICLE' then 1 else 0 end as UNALTERED_COMM_VEHICLE,
        case when Violation = 'FAILURE TO DISPLAY BUS PERMIT' then 1 else 0 end as FAILURE_TO_DISPLAY_BUS_PERMIT,
        case when Violation = 'NO STANDING-FOR HIRE VEH STOP' then 1 else 0 end as NO_STANDING_FOR_HIRE_VEH_STOP,
        case when Violation = 'BUS PARKING IN LOWER MANHATTAN' then 1 else 0 end as BUS_PARKING_IN_LOWER_MANHATTAN,
        case when Violation = 'UNAUTHORIZED PASSENGER PICK-UP' then 1 else 0 end as UNAUTHORIZED_PASSENGER_PICK_UP,
        case when Violation = 'NIGHTTIME STD/ PKG IN A PARK' then 1 else 0 end as NIGHTTIME_STD_PKG_IN_A_PARK,
        case when Violation = 'EXCAVATION-VEHICLE OBSTR TRAFF' then 1 else 0 end as EXCAVATION_VEHICLE_OBSTR_TRAFF,
        case when Violation = 'NO STANDING-COMMUTER VAN STOP' then 1 else 0 end as NO_STANDING_COMMUTER_VAN_STOP,
        case when Violation = 'DIVIDED HIGHWAY' then 1 else 0 end as DIVIDED_HIGHWAY,
        case when Violation = 'NO STOP/STANDNG EXCEPT PAS P/U' then 1 else 0 end as NO_STOP_STANDNG_EXCEPT_PAS_P_U,
        case when Violation = 'OT PARKING-MISSING/BROKEN METR' then 1 else 0 end as OT_PARKING_MISSING_BROKEN_METR,
        case when Violation = 'VEHICLE FOR SALE(DEALERS ONLY)' then 1 else 0 end as VEHICLE_FOR_SALE_DEALERS_ONLY,
        case when Violation = 'NO OPERATOR NAM/ADD/PH DISPLAY' then 1 else 0 end as NO_OPERATOR_NAM_ADD_PH_DISPLAY,
        case when Violation = 'ELEVATED/DIVIDED HIGHWAY/TUNNL' then 1 else 0 end as ELEVATED_DIVIDED_HIGHWAY_TUNNL,
        case when Violation = 'ANGLE PARKING-COMM VEHICLE' then 1 else 0 end as ANGLE_PARKING_COMM_VEHICLE,
        case when Violation = 'RAILROAD CROSSING' then 1 else 0 end as RAILROAD_CROSSING,
        case when Violation = 'EXPIRED METER-COMM METER ZONE' then 1 else 0 end as EXPIRED_METER_COMM_METER_ZONE,
        case when Violation = 'ALTERING INTERCITY BUS PERMIT' then 1 else 0 end as ALTERING_INTERCITY_BUS_PERMIT,
        case when Violation = 'WASH/REPAIR VEHCL-REPAIR ONLY' then 1 else 0 end as WASH_REPAIR_VEHCL_REPAIR_ONLY,
        case when Violation = 'PKG IN EXC. OF LIM-COMM MTR ZN' then 1 else 0 end as PKG_IN_EXC_OF_LIM_COMM_MTR_ZN,
        case when Violation = 'VACANT LOT' then 1 else 0 end as VACANT_LOT,
        case when Violation = 'NO STANDING EXCP DP' then 1 else 0 end as NO_STANDING_EXCP_DP,
        case when Violation = 'MIDTOWN PKG OR STD-3HR LIMIT' then 1 else 0 end as MIDTOWN_PKG_OR_STD_3HR_LIMIT,
        case when Violation = 'VEH-SALE/WSHNG/RPRNG/DRIVEWAY' then 1 else 0 end as VEH_SALE_WSHNG_RPRNG_DRIVEWAY,
        case when Violation = 'MARGINAL STREET/WATER FRONT' then 1 else 0 end as MARGINAL_STREET_WATER_FRONT,
        case when Violation = 'OVERTIME STDG D/S' then 1 else 0 end as OVERTIME_STDG_D_S,
        case when Violation = 'BUS LANE VIOLATION' then 1 else 0 end as BUS_LANE_VIOLATION,
        case when Violation = 'UNALTERED COMM VEH-NME/ADDRESS' then 1 else 0 end as UNALTERED_COMM_VEH_NME_ADDRESS,
        case when Violation = 'NO STANDING-OFF-STREET LOT' then 1 else 0 end as NO_STANDING_OFF_STREET_LOT,
        case when Violation = 'NO STANDING-SNOW EMERGENCY' then 1 else 0 end as NO_STANDING_SNOW_EMERGENCY,
        case when Violation = 'REMOVE/REPLACE FLAT TIRE' then 1 else 0 end as REMOVE_REPLACE_FLAT_TIRE,
        case when Violation = 'OVERTIME STANDING DP' then 1 else 0 end as OVERTIME_STANDING_DP,
        `Fine Amount` as fine_amount
    from 
        data
    """
Clean_Data = SQL_Func(query)

In [6]:
Clean_Data.head()

Unnamed: 0,in_state,passenger_car,business_hours,year,is_weekend,Manhattan,Bronx,Brooklyn,Queens,Staten_Island,...,VEH_SALE_WSHNG_RPRNG_DRIVEWAY,MARGINAL_STREET_WATER_FRONT,OVERTIME_STDG_D_S,BUS_LANE_VIOLATION,UNALTERED_COMM_VEH_NME_ADDRESS,NO_STANDING_OFF_STREET_LOT,NO_STANDING_SNOW_EMERGENCY,REMOVE_REPLACE_FLAT_TIRE,OVERTIME_STANDING_DP,fine_amount
0,1,1,0,2021,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,115.0
1,1,1,0,2016,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,115.0
2,0,1,1,2017,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,60.0
3,1,1,1,2017,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,45.0
4,1,1,1,2017,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,35.0


In [7]:
#Create a subset with a fraction of the original dataset due to size and memory issues

Clean_Data = Clean_Data.sample(frac=0.25)


# Linear Regression

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

X = Clean_Data.drop('fine_amount', axis=1)
y = Clean_Data['fine_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

column_names = X_train.columns

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

linear_model = LinearRegression()

linear_model.fit(X_train, y_train)

y_pred = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, y_pred)
r2_linear = r2_score(y_test, y_pred)

linear_coefficients = linear_model.coef_
linear_feature_importance = pd.DataFrame({'Feature': column_names, 'Standardized Coefficient': linear_coefficients})
linear_feature_importance['Absolute Coefficient'] = linear_feature_importance['Standardized Coefficient'].abs()
linear_feature_importance = linear_feature_importance.sort_values(by='Absolute Coefficient', ascending=False)

print(f"Linear Regression Mean Squared Error: {mse_linear}")
print(f"Linear Regression R-squared: {r2_linear}")
linear_feature_importance.head(10)

Linear Regression Mean Squared Error: 41.612795093410696
Linear Regression R-squared: 0.9589289532871226


Unnamed: 0,Feature,Standardized Coefficient,Absolute Coefficient
39,INSP_STICKER_MUTILATED_C_FEIT,-1019583000000.0,1019583000000.0
13,NO_STANDING_DAY_TIME_LIMITS,14.21991,14.21991
14,FIRE_HYDRANT,11.89006,11.89006
17,DOUBLE_PARKING,9.786565,9.786565
12,FAIL_TO_DSPLY_MUNI_METER_RECPT,-8.779409,8.779409
20,NO_STANDING_BUS_STOP,8.642346,8.642346
10,NO_PARKING_STREET_CLEANING,-6.398149,6.398149
21,NO_STANDING_COMM_METER_ZONE,5.518653,5.518653
18,EXPIRED_MUNI_METER,-5.342601,5.342601
24,CROSSWALK,5.163139,5.163139


# Second Degree Polynomial Regression

In [9]:
from sklearn.preprocessing import PolynomialFeatures

X = Clean_Data.drop('fine_amount', axis=1)
y = Clean_Data['fine_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

poly_features = PolynomialFeatures(degree=2)
X_train = poly_features.fit_transform(X_train)
X_test = poly_features.transform(X_test)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

poly_model = LinearRegression()
poly_model.fit(X_train, y_train)

y_pred = poly_model.predict(X_test)

mse_poly = mean_squared_error(y_test, y_pred)
r2_poly = r2_score(y_test, y_pred)


In [10]:
feature_names_poly = poly_features.get_feature_names_out(column_names)

poly_coefficients = poly_model.coef_
poly_feature_importance = pd.DataFrame({
    'Feature': feature_names_poly,
    'Standardized Coefficient': poly_coefficients
    })
poly_feature_importance['Absolute Coefficient'] = poly_feature_importance['Standardized Coefficient'].abs()
poly_feature_importance = poly_feature_importance.sort_values(by='Absolute Coefficient', ascending=False)

print(f"2nd Degree Poly Regression Mean Squared Error: {mse_poly}")
print(f"2nd Degree Poly Regression R-squared: {r2_poly}")
print("Number of features:", len(poly_feature_importance))

2nd Degree Poly Regression Mean Squared Error: 1.0449546656622481e+27
2nd Degree Poly Regression R-squared: -1.0530264698306461e+24
Number of features: 5460


In [11]:
poly_feature_importance.head(10)

Unnamed: 0,Feature,Standardized Coefficient,Absolute Coefficient
4899,INTERSECTION^2,-14639550000000.0,14639550000000.0
1365,NO_STANDING_DAY_TIME_LIMITS^2,10195600000000.0,10195600000000.0
14,NO_STANDING_DAY_TIME_LIMITS,-9614952000000.0,9614952000000.0
5109,EXCAVATION_VEHICLE_OBSTR_TRAFF^2,-8602310000000.0,8602310000000.0
71,INTERSECTION,8480711000000.0,8480711000000.0
69,NO_STD_EXC_TRKS_GMTDST_NO_TRK,-6416175000000.0,6416175000000.0
70,OVERNIGHT_TRACTOR_TRAILER_PKG,6239650000000.0,6239650000000.0
97,OVERTIME_STDG_D_S,6079053000000.0,6079053000000.0
154,in_state REG_STICKER_MUTILATED_C_FEIT,-5792307000000.0,5792307000000.0
102,REMOVE_REPLACE_FLAT_TIRE,-5785098000000.0,5785098000000.0


# Elastc Net Regression

In [12]:
from sklearn.linear_model import ElasticNet

X = Clean_Data.drop('fine_amount', axis=1)
y = Clean_Data['fine_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

column_names = X_train.columns

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

elastic_net_model = ElasticNet(alpha=1.0, l1_ratio=0.5)

elastic_net_model.fit(X_train, y_train)

y_pred = elastic_net_model.predict(X_test)

mse_enet = mean_squared_error(y_test, y_pred)
r2_enet = r2_score(y_test, y_pred)

In [13]:
enet_coefficients = elastic_net_model.coef_

enet_feature_importance = pd.DataFrame({
    'Feature': column_names,
    'Coefficient': enet_coefficients
    })
enet_feature_importance['Absolute Coefficient'] = enet_feature_importance['Coefficient'].abs()
enet_feature_importance = enet_feature_importance.sort_values(by='Absolute Coefficient', ascending=False)

print(f"Elastic Net Regression Mean Squared Error: {mse_enet}")
print(f"Elastic Net Regression R-squared: {r2_enet}")
enet_feature_importance.head(10)

Elastic Net Regression Mean Squared Error: 177.48212042809857
Elastic Net Regression R-squared: 0.8274804144525145


Unnamed: 0,Feature,Coefficient,Absolute Coefficient
12,FAIL_TO_DSPLY_MUNI_METER_RECPT,-7.807557,7.807557
13,NO_STANDING_DAY_TIME_LIMITS,7.778541,7.778541
14,FIRE_HYDRANT,6.48316,6.48316
10,NO_PARKING_STREET_CLEANING,-6.354715,6.354715
17,DOUBLE_PARKING,5.092335,5.092335
18,EXPIRED_MUNI_METER,-4.529881,4.529881
20,NO_STANDING_BUS_STOP,4.477263,4.477263
5,Manhattan,2.873926,2.873926
21,NO_STANDING_COMM_METER_ZONE,2.677652,2.677652
24,CROSSWALK,2.577928,2.577928


# Random Forest Regression

In [14]:
from sklearn.ensemble import RandomForestRegressor

X = Clean_Data.drop('fine_amount', axis=1)
y = Clean_Data['fine_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

rf_model = RandomForestRegressor(n_estimators=100, max_depth=None)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

In [15]:
feature_importance = pd.DataFrame({'Feature': X_train.columns, 'Importance': rf_model.feature_importances_})
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)

print(f"Random Forest Regression Mean Squared Error: {mse_rf}")
print(f"Random Forest Regression R-squared: {r2_rf}")
feature_importance.head(10)

Random Forest Regression Mean Squared Error: 20.226224977537903
Random Forest Regression R-squared: 0.979842860368237


Unnamed: 0,Feature,Importance
10,NO_PARKING_STREET_CLEANING,0.203193
12,FAIL_TO_DSPLY_MUNI_METER_RECPT,0.17218
18,EXPIRED_MUNI_METER,0.114341
16,NO_PARKING_DAY_TIME_LseIMITS,0.079565
11,INSP_STICKER_EXPIRED_MISSING,0.073269
13,NO_STANDING_DAY_TIME_LIMITS,0.06784
15,REG_STICKER_EXPIRED_MISSING,0.051425
22,FAIL_TO_DISP_MUNI_METER_RECPT,0.030658
5,Manhattan,0.024232
19,FRONT_OR_BACK_PLATE_MISSING,0.02126


# Multilayer Perceptron

In [18]:
from sklearn.neural_network import MLPRegressor

X = Clean_Data.drop('fine_amount', axis=1)
y = Clean_Data['fine_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

column_names = X_train.columns

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

mlp_model = MLPRegressor()

mlp_model.fit(X_train, y_train)

y_pred = mlp_model.predict(X_test)

mse_mlp = mean_squared_error(y_test, y_pred)
r2_mlp = r2_score(y_test, y_pred)



In [19]:
print(f"MLP Regression Mean Squared Error: {mse_mlp}")
print(f"MLP Regression R-squared: {r2_mlp}")

MLP Regression Mean Squared Error: 24.295043096745363
MLP Regression R-squared: 0.9757193040058081
