## Read CSV File

In [2]:
import pandas as pd
csv_file_path = 'Data/Data_Sets/KCPD_5_Year_Analysis_Cleaned.csv'
df = pd.read_csv(csv_file_path)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 445289 entries, 0 to 445288
Data columns (total 30 columns):
 #   Column                           Non-Null Count   Dtype  
---  ------                           --------------   -----  
 0   Report_No                        445289 non-null  object 
 1   Reported_Date                    445289 non-null  object 
 2   Reported_Time                    445289 non-null  object 
 3   Year                             445289 non-null  int64  
 4   Quarter                          445289 non-null  int64  
 5   Month                            445289 non-null  object 
 6   Day_of_Week                      445289 non-null  object 
 7   From_Date                        445289 non-null  object 
 8   From_Time                        445289 non-null  object 
 9   Adjusted_To_Date                 445289 non-null  object 
 10  Adjusted_To_Time                 445289 non-null  object 
 11  Offense                          445289 non-null  object 
 12  De

## Create New Feature - Duration
This field was created after the summary statistics due to the nature of the time data that would help predicted the time the crime offense began to the end time and the amount of time it may take for the crime to be fully reported and the exact time it starts. 

In [None]:
# Define a function to parse dates and times and calculate duration
def calculate_duration(row):
    from_datetime = pd.to_datetime(f"{row['From_Date']} {row['From_Time']}")
    adjust_to_datetime = pd.to_datetime(f"{row['Adjusted_To_Date']} {row['Adjusted_To_Time']}")
    return (adjust_to_datetime - from_datetime).total_seconds() /60

# Apply the function to each row to calculate the duration
# and create a new 'Duration' column
df['Duration'] = df.apply(calculate_duration, axis=1)

## Creation of New Feature - Time of Day

In [None]:
import pandas as pd
from datetime import datetime, time


# Function to categorize time of day
def get_time_of_day(from_time_str):
    from_time = pd.to_datetime(from_time_str, format='%H:%M:%S').time()
    
    if from_time >= time(5, 0) and from_time < time(12, 0):
        return 'Morning'
    elif from_time >= time(12, 0) and from_time < time(17, 0):
        return 'Afternoon'
    elif from_time >= time(17, 0) and from_time < time(21, 0):
        return 'Evening'
    else:
        return 'Night'

# Apply the function to each row in the DataFrame to create the new 'Time_of_Day' column
df['Time_of_Day'] = df['From_Time'].apply(get_time_of_day)


## Data Preprocessing - One-Hot Encoding

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Fit and transform the Age_Range column
age_range_encoded = encoder.fit_transform(df[['Age_Range']])

# Create a DataFrame with the encoded features
age_range_encoded_df = pd.DataFrame(age_range_encoded, columns=encoder.get_feature_names_out(['Age_Range']))

# Concatenate the original DataFrame with the encoded DataFrame
KCPD_Analysis_df = pd.concat([df, age_range_encoded_df], axis=1)
