In [92]:
#Import what technolgies you need
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3



In [93]:
# Load the Datasets
food_df = pd.read_csv("Louisville_Metro_KY_Inspection_Results_Food_Service_Establishments_1200720516040038198.csv")
gun_df = pd.read_csv("Gun_Violence_Data_3431704539549171373.csv")

#Print how many rows and columns
print("Food Inspection Dataset:")
print("Shape:", food_df.shape)

print("\nGun Violence Dataset:")
print("Shape:", gun_df.shape)

Food Inspection Dataset:
Shape: (6805, 21)

Gun Violence Dataset:
Shape: (65569, 17)


In [94]:
##Exploratory Data Analysis (EDA)

#Defining Datasets
datasets = {
    "Food Data": food_df,
    "Gun Violence Data": gun_df
}

#Head & tail of rows, Column names, null values, data types, basic numeric statistics
def explore_dataset(df, name):
    print(f"\nDataset: {name}")
    print("First 3 Rows:\n", df.head(3))
    print("\nLast 3 Rows:\n", df.tail(3))
    print("\nColumn Names:\n", list(df.columns))
    print("\nNull Value Count:\n", df.isnull().sum())
    print("\nData Types:\n", df.dtypes)
    print("\nNumeric Summary:\n", df.describe())

#Loop through the datasets 
for name, df in datasets.items():
    explore_dataset(df, name)


Dataset: Food Data
First 3 Rows:
    ScoreRecent GradeRecent DateRecent  Score2 Grade2 Date2  Score3 Grade3  \
0            0         NaN        NaN       0    NaN   NaN       0    NaN   
1            0         NaN        NaN       0    NaN   NaN       0    NaN   
2            0         NaN        NaN       0    NaN   NaN       0    NaN   

  Date3  permit_number  ...   facility_type_description subtype  \
0   NaN         125387  ...   RETAIL/FOOD ESTABLISHMENT      54   
1   NaN         125455  ...  FOOD SERVICE ESTABLISHMENT      52   
2   NaN         125467  ...  FOOD SERVICE ESTABLISHMENT      11   

                 subtype_description        premise_name     premise_address  \
0  RETAIL-FOOD  10,000 SQ FT OR LESS           BOONES BP  3320 FRANKFORT AVE   
1   SELF-CONTAINED MOBILE FOOD UNITS        SLIM GOODY'S                 NaN   
2                       FOOD SERVICE  BILLION CUP COFFEE  1812 BROWNSBORO RD   

  premise_city premise_state premise_zip         opening_date Obje

In [95]:
##Cleaning the Data 

def clean_datasets(food_df, gun_df):
    # --- Clean food_df ---
    food_df = food_df.copy()
    food_df.columns = food_df.columns.str.lower().str.replace(' ', '_')

    food_df['zip_code'] = food_df['premise_zip'].astype(str).str.zfill(5)
    food_df['daterecent'] = pd.to_datetime(food_df['daterecent'], errors='coerce')
    food_df['opening_date'] = pd.to_datetime(food_df['opening_date'], errors='coerce')

    food_df = food_df.dropna(subset=['scorerecent', 'graderecent', 'zip_code', 'daterecent'])

    food_df['facility_type'] = food_df['facility_type'].astype(str).str.strip().str.title()
    food_df['facility_type'] = food_df['facility_type'].replace('', 'Unknown')
    food_df['facility_type'] = food_df['facility_type'].fillna('Unknown')

    food_df['premise_city'] = food_df['premise_city'].fillna('Unknown')
    food_df['premise_state'] = food_df['premise_state'].fillna('Unknown')
    food_df['premise_name'] = food_df['premise_name'].fillna('Unnamed Facility')

    food_df = food_df.dropna(subset=['premise_address'])
    food_df = food_df[food_df['daterecent'] >= pd.Timestamp('2020-01-01')]
    food_df = food_df.reset_index(drop=True)

    # --- Clean gun_df ---
    gun_df = gun_df.copy()
    gun_df.columns = gun_df.columns.str.lower().str.replace(' ', '_')

    gun_df['zip_code'] = gun_df['zip_code'].astype(str).str.zfill(5)
    gun_df['datetime'] = pd.to_datetime(gun_df['datetime'], errors='coerce')

    gun_df = gun_df.dropna(subset=['zip_code', 'datetime', 'latitude', 'longitude', 'crime_type'])

    for col in ['sex', 'race', 'age_group']:
        gun_df[col] = gun_df[col].astype(str).str.strip().str.title()
        gun_df[col] = gun_df[col].replace('', 'Unknown')
        gun_df[col] = gun_df[col].fillna('Unknown')

    gun_df['neighborhood'] = gun_df['neighborhood'].fillna('Unknown')
    gun_df['cause'] = gun_df['cause'].fillna('Unknown')
    gun_df = gun_df[gun_df['datetime'] >= pd.Timestamp('2020-01-01')]
    gun_df = gun_df.reset_index(drop=True)

    return food_df, gun_df