# Import Libraries

In [37]:
import pandas as pd

# Keep Ages from 1 to 100

In [38]:
def clean_age(df):
    return df[df["Vict Age"].between(1, 100)]

# Clean the Date Format

In [39]:
def clean_dates(df):
    df['Date Rptd'] = df['Date Rptd'].apply(lambda x: x.split()[0])
    df['DATE OCC'] = df['DATE OCC'].apply(lambda x: x.split()[0])

    return df

# Drop Specific Columns

In [40]:
def drop_columns(df, drop_list):
    return df.drop(columns=drop_list)

# Add Sun Rise and Sun Set times and whether the time of crime is Day or Night

In [41]:
def sun_time(df:pd.DataFrame, day_df:pd.DataFrame):
    ## Merge dataframes
    merged_df = pd.merge(df, day_df, left_on='DATE OCC', right_on='Date', how='left')

    ## Night or Day Mask
    mask_day = (merged_df['TIME OCC'] >= merged_df['Time AM']) & (merged_df['TIME OCC'] < merged_df['Time PM'])

    ## Assign to Night or Day depending on Condition
    merged_df['Sun'] = 'Night'
    merged_df.loc[mask_day, 'Sun'] = 'Day'

    ## Drop Redundant Columns
    merged_df.drop(columns=['Date', 'Time AM', 'Time PM'], inplace=True)

    return merged_df

# Execute Functions

In [42]:
if __name__ == "__main__":
    ## Crimes Committed in Los Angeles
    df = pd.read_csv("Data/Crime_Data_from_2020_to_Present.csv")
    ## Sun Light Times in Los Angeles
    day_df = pd.read_csv("Data/daylightLA.csv")

    ## Clean/Manipulate Data
    df = clean_age(df)
    df = clean_dates(df)
    df = sun_time(df, day_df)
    df.to_csv("Data/RefinedFile.csv", sep=',', encoding='utf-8', index=False)