In [None]:
#Dependencies
from google.colab import drive
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from datetime import date
import math
import holidays

#Drive Mount
drive.mount('/content/drive')
directory = '/content/drive/MyDrive/Data Projects/Parking Citations/Files'

#Dataframe Settings
pd.set_option("display.max_rows", None, "display.max_columns", None)

#Data Preparation
______________________


In [8]:
#Importing Data
df = pd.read_csv(f'{directory}/Parking_Citations_3-25.csv', usecols= ['Fine amount', 'Issue Date', 'Issue time'])

In [19]:
#Drop Missing Values
df = df.dropna()

#Renaiming Columns
df = df.rename(columns = {'Fine amount':'Fine_Amount', 'Issue time':'Time', 'Issue Date':'Date'})

#Converting Date to datetime object
df['Date'] = pd.to_datetime(df.Date) 

#Seperating Year
df['Year'] = pd.DatetimeIndex(df['Date']).year
#Only including years (2015-Present)
df = df.loc[df['Year'] >= 2015]
df = df.loc[df['Date'] <= datetime.strptime('2022-03-24', '%Y-%m-%d')]

#Month of the year
df['Month'] = pd.DatetimeIndex(df['Date']).month_name() 

#Month of the year index
df['Month_Index'] = pd.DatetimeIndex(df['Date']).month 

#Day of the month
df['Day'] = pd.DatetimeIndex(df['Date']).day 

#Day of the week
df['Weekday'] = df['Date'].dt.day_name() 

#Week of the year
df['Week'] = df['Date'].dt.isocalendar().week 

#Day of the year
df['Year_Day'] = df['Date'].dt.dayofyear

#Calculating Hours
def hour(x):
  return int(math.floor(x / 100.0))
df['Hour'] = df['Time'].apply(hour)

#Calculating Minutes
def minute(x):
  return int(x - (math.floor(x / 100.0) *100))
df['Minute'] = df['Time'].apply(minute)

#Weekends
df['Weekend'] = df['Weekday']
df['Weekend'] = df['Weekend'].replace(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'], 'Weekday')
df['Weekend'] = df['Weekend'].replace(['Saturday', 'Sunday'], 'Weekend')

#Holidays
us_holidays = holidays.US()
def get_holiday(x):
    return x in us_holidays
df['Holiday'] = df['Date'].apply(get_holiday)
df['Holiday'] = df['Holiday'].replace(False, 'No Holiday')
df['Holiday'] = df['Holiday'].replace(True, 'Holiday')

#Dropping Fine Amount Outliers
df = df.loc[df['Fine_Amount'] <= 5000]

#Column Order
df = df[['Date', 'Year', 'Day', 'Month', 'Month_Index', 'Weekday', 'Weekend', 'Holiday', 'Week', 'Year_Day', 'Time', 'Hour', 'Minute', 'Fine_Amount']]

In [20]:
#Saving Cleaned File
df.to_csv(f'{directory}/Parking_Citations_Cleaned.csv', index=False)