In [48]:
import pandas as pd
import requests
import json
import os
from config import geoapify_key
from zipfile import ZipFile
import zlib

### Data Processing (collection and manipulation of data)

#### Collection

In [2]:
# Zip File Extraction
    # ? Insead of extracting one zip file at a given time, we will extract all the zip files using a for loop
def unzip_files(zip_path, extract_path):
    for zip_path in zip_paths:
        with ZipFile (zip_path, 'r') as zObject:
        # * Extracting all memebters of the zip files into a specific location
            zObject.extractall(
                path = extract_path
            )
# * Usage zip function - Variables:
zip_paths = ["/Users/galbeeir/Desktop/git/crime_analysis/Datasets_1.zip", "/Users/galbeeir/Desktop/git/crime_analysis/Datasets_2.zip"]
extract_path = "/Users/galbeeir/Desktop/git/crime_analysis/"

# * Executing the zip function
unzip_files(zip_paths, extract_path)


In [3]:
# * List of directories
datasets_1 = "/Users/galbeeir/Desktop/git/crime_analysis/Datasets_1/"
datasets_2 = "/Users/galbeeir/Desktop/git/crime_analysis/Datasets_2/"

# * List all the files from the directory
file_list_1 = os.listdir(datasets_1)
file_list_2 = os.listdir(datasets_2)

# * Merge all files stored in file_list
crime_df = pd.DataFrame()

# * Merge all files stored in file_list_1
for file in file_list_1:
    if file.endswith(".csv"):
        file_path = os.path.join(datasets_1, file)
        df = pd.read_csv(file_path, delimiter=',', encoding='utf-8')
        crime_df = pd.concat([crime_df, df])

# * Merge all files stored in file_list_2
for file in file_list_2:
    if file.endswith(".csv"):
        file_path = os.path.join(datasets_2, file)
        df = pd.read_csv(file_path, delimiter=',', encoding='utf-8')
        crime_df = pd.concat([crime_df, df])
    
     
crime_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,Year,Updated On,Latitude,Longitude
0,0,11553168,JC100745,2019-01-01 00:00:00,008XX N MICHIGAN AVE,890,THEFT,FROM BUILDING,RESTAURANT,False,False,1833,18,2.0,8.0,2019,01/10/2019 03:16:50 PM,41.89877,-87.624115
1,1,11895528,JC515003,2019-01-01 00:00:00,059XX W LELAND AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,False,False,1622,16,45.0,15.0,2019,11/19/2019 03:57:06 PM,,
2,2,12456410,JE341674,2019-01-01 00:00:00,012XX W JACKSON BLVD,2825,OTHER OFFENSE,HARASSMENT BY TELEPHONE,RESIDENCE,False,False,1231,12,28.0,28.0,2019,08/19/2021 04:51:33 PM,,
3,3,12367811,JE232794,2019-01-01 00:00:00,008XX W AGATITE AVE,1537,OFFENSE INVOLVING CHILDREN,POSSESSION OF PORNOGRAPHIC PRINT,APARTMENT,True,False,1914,19,46.0,3.0,2019,05/18/2021 05:27:50 PM,,
4,4,12368777,JE234183,2019-01-01 00:00:00,035XX S COTTAGE GROVE AVE,281,CRIMINAL SEXUAL ASSAULT,NON-AGGRAVATED,RESIDENCE,False,False,212,2,4.0,36.0,2019,05/19/2021 06:08:48 PM,,


#### Key definitions
1. Ward = City council (There are **50 in Chicago**) 
2. Community Areas = There are **77 commnuity areas** in Chicago 
    * => [Chicago - Community Areas & Wards](https://www.chicago.gov/city/en/depts/dgs/supp_info/citywide_maps.html)
3. District = There are **25 police districts** in Chicago => [Police Districts](https://home.chicagopolice.org/about/police-districts/)

**Data processing actions:**
1. Convert the community areas numbers to names
    1. Method = using pd.read_html - using the following link => [Wikipadia - community areas in Chicago](https://en.wikipedia.org/wiki/Community_areas_in_Chicago)

*Note:*<br>
To find out more about *Ward, Community Areas, and Police Distrits*, please click here -> [`resources`](https://github.com/Kokolipa/crime_analysis/tree/main/Resources)
<br>

In [4]:
# Importing Chicago community area names and codes from wikipedia
chi_community_areas = (pd.read_html("https://en.wikipedia.org/wiki/Community_areas_in_Chicago")[0]
 .droplevel(level=0, axis=1)
 .set_index('No.')
 .drop(['(sq mi.)', '(/sq mi.)', '(km2)', '(/km2)'], axis=1)
 .rename(columns={'.mw-parser-output .nobold{font-weight:normal}(2020)[10]': 'Population'}))

chi_community_areas.head()

Unnamed: 0_level_0,Name,Population
No.,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Rogers Park,55628
2,West Ridge,77122
3,Uptown,57182
4,Lincoln Square,40494
5,North Center,35114


In [5]:
# Removing the Total row an the button of the dataframe
chi_community_areas = chi_community_areas.iloc[:-1]

In [6]:
# Changing the dtype of 
chi_community_areas.index = chi_community_areas.index.astype('Int8')
chi_community_areas.head()

Unnamed: 0_level_0,Name,Population
No.,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Rogers Park,55628
2,West Ridge,77122
3,Uptown,57182
4,Lincoln Square,40494
5,North Center,35114


In [7]:
# Transforming the dtype of Community area from crime_df
crime_df['Community Area'] = crime_df['Community Area'].astype('Int8')

In [8]:
# Validation transformation
crime_df['Community Area'].dtype

Int8Dtype()

In [9]:
# Merging both of the dataframes
crime_df = pd.merge(
    left = crime_df,
    right = chi_community_areas,
    how = 'left',
    left_on= ['Community Area'],
    right_on= ['No.'],
)

In [10]:
crime_df.columns

Index(['Unnamed: 0', 'ID', 'Case Number', 'Date', 'Block', 'IUCR',
       'Primary Type', 'Description', 'Location Description', 'Arrest',
       'Domestic', 'Beat', 'District', 'Ward', 'Community Area', 'Year',
       'Updated On', 'Latitude', 'Longitude', 'Name', 'Population'],
      dtype='object')

In [11]:
# Renaming columns
crime_df.drop(['Unnamed: 0', 'Community Area', 'Population', 'Description', 'IUCR', 'Beat'], axis=1, inplace=True)

In [12]:
# Reorginising the columns of the dataset
crime_df = crime_df.reindex(labels=['ID', 'Case Number', 'Date', 'Year','Updated On','Block', 'Primary Type', 'Location Description', 'Arrest', 'Domestic', 'District', 'Ward', 'Name', 'Latitude', 'Longitude'], axis=1)

In [13]:
# Dropping irrelevant columns
crime_df.rename(columns={'Name': 'Community Area', 'District': 'Police District'}, inplace=True)

#### Manipulation

In [15]:
# Search key method (ABC)
crime_df.loc[crime_df['Primary Type'].str.startswith('C')]['Primary Type'].value_counts()

CRIMINAL DAMAGE                      118306
CRIMINAL TRESPASS                     20967
CRIMINAL SEXUAL ASSAULT                5716
CRIM SEXUAL ASSAULT                     983
CONCEALED CARRY LICENSE VIOLATION       808
Name: Primary Type, dtype: int64

In [16]:
crime_df['Primary Type'].replace({'CRIM SEXUAL ASSAULT': 'CRIMINAL SEXUAL ASSAULT'}, inplace=True)

In [17]:
# Replacing NaN values in the Location Description column with 'BLANK' value
crime_df['Location Description'].fillna('Blank', inplace=True)

In [18]:
# Search key method (ABC)
crime_df.loc[crime_df['Location Description'].str.startswith('Z')]['Location Description'].value_counts()

Series([], Name: Location Description, dtype: int64)

In [19]:
# Replacing all duplicated / look alike values
crime_df['Location Description'].replace({'AIRPORT TERMINAL UPPER LEVEL - SECURE AREA': 'AIRPORT',
                                          'AIRPORT TERMINAL LOWER LEVEL - NON-SECURE AREA': 'AIRPORT',
                                          'AIRPORT PARKING LOT': 'AIRPORT',
                                          'AIRPORT BUILDING NON-TERMINAL - NON-SECURE AREA': 'AIRPORT',
                                          'AIRPORT TERMINAL UPPER LEVEL - NON-SECURE AREA': 'AIRPORT',
                                          'AIRPORT EXTERIOR - NON-SECURE AREA': 'AIRPORT',
                                          'AIRPORT BUILDING NON-TERMINAL - SECURE AREA': 'AIRPORT',
                                          'AIRPORT EXTERIOR - SECURE AREA': 'AIRPORT',
                                          'AIRPORT/AIRCRAFT': 'AIRPORT',
                                          'AIRPORT VENDING ESTABLISHMENT': 'AIRPORT',
                                          'AIRPORT TRANSPORTATION SYSTEM (ATS)': 'AIRPORT',
                                          'AIRPORT TERMINAL MEZZANINE - NON-SECURE AREA ': 'AIRPORT',
                                          'BOAT / WATERCRAFT': 'BOAT/WATERCRAFT',
                                          'CTA TRAIN': 'CRIMINAL TRIBES ACT',
                                          'CTA BUS': 'CRIMINAL TRIBES ACT',
                                          'CTA PLATFORM': 'CRIMINAL TRIBES ACT',
                                          'CTA STATION': 'CRIMINAL TRIBES ACT',
                                          'CTA BUS STOP': 'CRIMINAL TRIBES ACT',
                                          'CTA PARKING LOT / GARAGE / OTHER PROPERTY': 'CRIMINAL TRIBES ACT',
                                          'CTA GARAGE / OTHER PROPERTY': 'CRIMINAL TRIBES ACT',
                                          'CTA TRACKS - RIGHT OF WAY': 'CRIMINAL TRIBES ACT',
                                          'CTA "L" TRAIN': 'CRIMINAL TRIBES ACT',
                                          'CTA "L" PLATFORM': 'CRIMINAL TRIBES ACT',
                                          'CTA PROPERTY': 'CRIMINAL TRIBES ACT',
                                          'CTA SUBWAY STATION': 'CRIMINAL TRIBES ACT',
                                          'CHA APARTMENT': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA PARKING LOT / GROUNDS': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA HALLWAY / STAIRWELL / ELEVATOR': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA HALLWAY/STAIRWELL/ELEVATOR': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA PARKING LOT': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA GROUNDS': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA LOBBY': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA PLAY LOT': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA HALLWAY': 'CHICAGO HOUSING AUTHORITY',
                                          'CHA ELEVATOR': 'CHICAGO HOUSING AUTHORITY',
                                          'CHURCH/SYNAGOGUE/PLACE OF WORSHIP': 'CHURCH',
                                          'CHURCH / SYNAGOGUE / PLACE OF WORSHIP': 'CHURCH',
                                          'COLLEGE / UNIVERSITY - GROUNDS': 'COLLEGE/UNIVERSITY',
                                          'COLLEGE/UNIVERSITY GROUNDS': 'COLLEGE/UNIVERSITY',
                                          'FACTORY / MANUFACTURING BUILDING':'FACTORY/MANUFACTURING BUILDING',
                                          'GAS STATION DRIVE/PROP.': 'GAS STATION',
                                          'GOVERNMENT BUILDING / PROPERTY':'GOVERNMENT BUILDING/PROPERTY',
                                          'HOTEL / MOTEL': 'HOTEL/MOTEL',
                                          'HOSPITAL BUILDING / GROUNDS': 'HOSPITAL',
                                          'HOSPITAL BUILDING/GROUNDS': 'HOSPITAL',
                                          'LAKEFRONT / WATERFRONT / RIVERBANK':'LAKEFRONT/WATERFRONT/RIVERBANK',
                                          'MOVIE HOUSE / THEATER': 'MOVIE HOUSE/THEATER',
                                          'MEDICAL / DENTAL OFFICE':'MEDICAL/DENTAL OFFICE',
                                          'NURSING / RETIREMENT HOME':'NURSING HOME/RETIREMENT HOME',
                                          'NURSING HOME':'NURSING HOME/RETIREMENT HOME',
                                          'OTHER (SPECIFY)':'OTHER',
                                          'OTHER RAILROAD PROPERTY / TRAIN DEPOT':'OTHER RAILROAD PROP/TRAIN DEPOT',
                                          'OTHER RAILROAD PROP / TRAIN DEPOT':'OTHER RAILROAD PROP/TRAIN DEPOT',
                                          'PARKING LOT / GARAGE (NON RESIDENTIAL)': 'PARKING LOT/GARAGE',
                                          'PARKING LOT/GARAGE(NON.RESID.)': 'PARKING LOT/GARAGE',
                                          'POLICE FACILITY / VEHICLE PARKING LOT': 'POLICE FACILITY',
                                          'POLICE FACILITY/VEH PARKING LOT':'POLICE FACILITY',
                                          'RESIDENCE - PORCH / HALLWAY': 'RESIDENCE',
                                          'RESIDENCE - YARD (FRONT / BACK)': 'RESIDENCE',
                                          'RESIDENCE - GARAGE': 'RESIDENCE',
                                          'RESIDENCE PORCH/HALLWAY': 'RESIDENCE',
                                          'RESIDENTIAL YARD (FRONT/BACK)': 'RESIDENCE',
                                          'RESIDENCE-GARAGE':'RESIDENCE', 
                                          'SCHOOL - PUBLIC BUILDING': 'SCHOOL',
                                          'SCHOOL, PUBLIC, BUILDING': 'SCHOOL',
                                          'SCHOOL - PUBLIC GROUNDS': 'SCHOOL',
                                          'SCHOOL, PUBLIC, GROUNDS': 'SCHOOL',
                                          'SCHOOL - PRIVATE GROUNDS': 'SCHOOL',
                                          'SCHOOL, PRIVATE, BUILDING': 'SCHOOL',
                                          'SCHOOL - PRIVATE BUILDING': 'SCHOOL',
                                          'SCHOOL, PRIVATE, GROUNDS': 'SCHOOL',
                                          'SCHOOL YARD': 'SCHOOL',
                                          'SPORTS ARENA / STADIUM': 'SPORTS ARENA/STADIUM',
                                          'TAVERN / LIQUOR STORE': 'TAVERN/LIQUOR STORE',
                                          'VEHICLE - COMMERCIAL':'VEHICLE-COMMERCIAL',
                                          'VEHICLE - COMMERCIAL: ENTERTAINMENT / PARTY BUS':'VEHICLE-COMMERCIAL',
                                          'VEHICLE - COMMERCIAL: TROLLEY BUS':'VEHICLE-COMMERCIAL',
                                          'VEHICLE-COMMERCIAL - TROLLEY BUS':'VEHICLE-COMMERCIAL',
                                          'VEHICLE - COMMERCIAL':'VEHICLE-COMMERCIAL',
                                          'VEHICLE-COMMERCIAL - ENTERTAINMENT/PARTY BUS':'VEHICLE-COMMERCIAL',
                                          'VEHICLE - OTHER RIDE SHARE SERVICE (LYFT, UBER, ETC.)':'VEHICLE-OTHER RIDE SHARE SERVICE',
                                          'VEHICLE - OTHER RIDE SHARE SERVICE (E.G., UBER, LYFT)':'VEHICLE-OTHER RIDE SHARE SERVICE',
                                          }, inplace=True)

In [20]:
crime_df.head()

Unnamed: 0,ID,Case Number,Date,Year,Updated On,Block,Primary Type,Location Description,Arrest,Domestic,Police District,Ward,Community Area,Latitude,Longitude
0,11553168,JC100745,2019-01-01 00:00:00,2019,01/10/2019 03:16:50 PM,008XX N MICHIGAN AVE,THEFT,RESTAURANT,False,False,18,2.0,Near North Side,41.89877,-87.624115
1,11895528,JC515003,2019-01-01 00:00:00,2019,11/19/2019 03:57:06 PM,059XX W LELAND AVE,DECEPTIVE PRACTICE,Blank,False,False,16,45.0,Portage Park,,
2,12456410,JE341674,2019-01-01 00:00:00,2019,08/19/2021 04:51:33 PM,012XX W JACKSON BLVD,OTHER OFFENSE,RESIDENCE,False,False,12,28.0,Near West Side,,
3,12367811,JE232794,2019-01-01 00:00:00,2019,05/18/2021 05:27:50 PM,008XX W AGATITE AVE,OFFENSE INVOLVING CHILDREN,APARTMENT,True,False,19,46.0,Uptown,,
4,12368777,JE234183,2019-01-01 00:00:00,2019,05/19/2021 06:08:48 PM,035XX S COTTAGE GROVE AVE,CRIMINAL SEXUAL ASSAULT,RESIDENCE,False,False,2,4.0,Oakland,,


### Memory Optimization

In [21]:
crime_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1045814 entries, 0 to 1045813
Data columns (total 15 columns):
 #   Column                Non-Null Count    Dtype  
---  ------                --------------    -----  
 0   ID                    1045814 non-null  int64  
 1   Case Number           1045814 non-null  object 
 2   Date                  1045814 non-null  object 
 3   Year                  1045814 non-null  int64  
 4   Updated On            1045814 non-null  object 
 5   Block                 1045814 non-null  object 
 6   Primary Type          1045814 non-null  object 
 7   Location Description  1045814 non-null  object 
 8   Arrest                1045814 non-null  bool   
 9   Domestic              1045814 non-null  bool   
 10  Police District       1045814 non-null  int64  
 11  Ward                  1045767 non-null  float64
 12  Community Area        1045813 non-null  object 
 13  Latitude              1028014 non-null  float64
 14  Longitude             1028014 non-

In [22]:
crime_df.head()

Unnamed: 0,ID,Case Number,Date,Year,Updated On,Block,Primary Type,Location Description,Arrest,Domestic,Police District,Ward,Community Area,Latitude,Longitude
0,11553168,JC100745,2019-01-01 00:00:00,2019,01/10/2019 03:16:50 PM,008XX N MICHIGAN AVE,THEFT,RESTAURANT,False,False,18,2.0,Near North Side,41.89877,-87.624115
1,11895528,JC515003,2019-01-01 00:00:00,2019,11/19/2019 03:57:06 PM,059XX W LELAND AVE,DECEPTIVE PRACTICE,Blank,False,False,16,45.0,Portage Park,,
2,12456410,JE341674,2019-01-01 00:00:00,2019,08/19/2021 04:51:33 PM,012XX W JACKSON BLVD,OTHER OFFENSE,RESIDENCE,False,False,12,28.0,Near West Side,,
3,12367811,JE232794,2019-01-01 00:00:00,2019,05/18/2021 05:27:50 PM,008XX W AGATITE AVE,OFFENSE INVOLVING CHILDREN,APARTMENT,True,False,19,46.0,Uptown,,
4,12368777,JE234183,2019-01-01 00:00:00,2019,05/19/2021 06:08:48 PM,035XX S COTTAGE GROVE AVE,CRIMINAL SEXUAL ASSAULT,RESIDENCE,False,False,2,4.0,Oakland,,


In [32]:
# Bits - reduction
crime_df = crime_df.astype({'Year': 'Int16',
                'Police District': 'Int8',
                'Ward': 'category',
                'Location Description': 'category',
                'Primary Type': 'category',
                'ID': 'Int32',
                'Latitude': 'float32',
                'Longitude': 'float32'})


In [24]:
crime_df['Date'] = pd.to_datetime(
    crime_df['Date'],
    errors = 'coerce',
    infer_datetime_format=True, 
)

In [25]:
crime_df['Updated On'] = pd.to_datetime(
    crime_df['Updated On'],
    errors = 'coerce',
    infer_datetime_format=True, 
    format ='%T'
)

In [26]:
# Memory consumption reduction = 200 + MB
crime_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1045814 entries, 0 to 1045813
Data columns (total 15 columns):
 #   Column                Non-Null Count    Dtype         
---  ------                --------------    -----         
 0   ID                    1045814 non-null  Int32         
 1   Case Number           1045814 non-null  object        
 2   Date                  1045814 non-null  datetime64[ns]
 3   Year                  1045814 non-null  Int16         
 4   Updated On            1045814 non-null  datetime64[ns]
 5   Block                 1045814 non-null  object        
 6   Primary Type          1045814 non-null  category      
 7   Location Description  1045814 non-null  category      
 8   Arrest                1045814 non-null  bool          
 9   Domestic              1045814 non-null  bool          
 10  Police District       1045814 non-null  Int8          
 11  Ward                  1045767 non-null  category      
 12  Community Area        1045813 non-null  ob

### Data Cleaning

In [27]:
crime_df.isna().sum()

ID                          0
Case Number                 0
Date                        0
Year                        0
Updated On                  0
Block                       0
Primary Type                0
Location Description        0
Arrest                      0
Domestic                    0
Police District             0
Ward                       47
Community Area              1
Latitude                17800
Longitude               17800
dtype: int64

In [30]:
crime_df['Ward'].fillna('Blank', inplace=True)

In [31]:
crime_df['Community Area'].fillna('Blank', inplace=True)

In [33]:
crime_df.isna().sum()

ID                          0
Case Number                 0
Date                        0
Year                        0
Updated On                  0
Block                       0
Primary Type                0
Location Description        0
Arrest                      0
Domestic                    0
Police District             0
Ward                        0
Community Area              0
Latitude                17800
Longitude               17800
dtype: int64

In [34]:
crime_df.head()

Unnamed: 0,ID,Case Number,Date,Year,Updated On,Block,Primary Type,Location Description,Arrest,Domestic,Police District,Ward,Community Area,Latitude,Longitude
0,11553168,JC100745,2019-01-01,2019,2019-01-10 15:16:50,008XX N MICHIGAN AVE,THEFT,RESTAURANT,False,False,18,2.0,Near North Side,41.898769,-87.624115
1,11895528,JC515003,2019-01-01,2019,2019-11-19 15:57:06,059XX W LELAND AVE,DECEPTIVE PRACTICE,Blank,False,False,16,45.0,Portage Park,,
2,12456410,JE341674,2019-01-01,2019,2021-08-19 16:51:33,012XX W JACKSON BLVD,OTHER OFFENSE,RESIDENCE,False,False,12,28.0,Near West Side,,
3,12367811,JE232794,2019-01-01,2019,2021-05-18 17:27:50,008XX W AGATITE AVE,OFFENSE INVOLVING CHILDREN,APARTMENT,True,False,19,46.0,Uptown,,
4,12368777,JE234183,2019-01-01,2019,2021-05-19 18:08:48,035XX S COTTAGE GROVE AVE,CRIMINAL SEXUAL ASSAULT,RESIDENCE,False,False,2,4.0,Oakland,,


In [35]:
crime_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1045814 entries, 0 to 1045813
Data columns (total 15 columns):
 #   Column                Non-Null Count    Dtype         
---  ------                --------------    -----         
 0   ID                    1045814 non-null  Int32         
 1   Case Number           1045814 non-null  object        
 2   Date                  1045814 non-null  datetime64[ns]
 3   Year                  1045814 non-null  Int16         
 4   Updated On            1045814 non-null  datetime64[ns]
 5   Block                 1045814 non-null  object        
 6   Primary Type          1045814 non-null  category      
 7   Location Description  1045814 non-null  category      
 8   Arrest                1045814 non-null  bool          
 9   Domestic              1045814 non-null  bool          
 10  Police District       1045814 non-null  Int8          
 11  Ward                  1045814 non-null  category      
 12  Community Area        1045814 non-null  ob

#### Exporting the data to ZipFile for Analysis

In [43]:
crime_df.to_csv("/Users/galbeeir/Desktop/git/crime_analysis/crime_cleaned.csv")

In [55]:
def compress_data(data):
    compressed_data = zlib.compress(data.encode())
    return compressed_data

def export_to_zip(csv_path, zip_path_1, zip_path_2):
    # Read the csv file
    crime_df = pd.read_csv(csv_path)

    # Filter the DataFrame based on the conditions
    filtered_1 = crime_df.loc[(crime_df['Year'] == 2019)]
    filtered_2 = crime_df.loc[(crime_df['Year'] == 2020)]
    filtered_3 = crime_df.loc[(crime_df['Year'] == 2021)]
    filtered_4 = crime_df.loc[(crime_df['Year'] == 2022)]
    filtered_5 = crime_df.loc[(crime_df['Year'] == 2023)]

    # Create a ZipFile and add the filtered data as separate CSV files with compression
    with ZipFile(zip_path_1, 'w') as zip_file_1:
        zip_file_1.writestr('filtered_data_1.csv', compress_data(filtered_1.to_csv(index=False)))
        zip_file_1.writestr('filtered_data_2.csv', compress_data(filtered_2.to_csv(index=False)))
    
    with ZipFile(zip_path_2, 'w') as zip_file_2:
        zip_file_2.writestr('filtered_data_3.csv', compress_data(filtered_3.to_csv(index=False)))
        zip_file_2.writestr('filtered_data_4.csv', compress_data(filtered_4.to_csv(index=False)))
        zip_file_2.writestr('filtered_data_5.csv', compress_data(filtered_5.to_csv(index=False)))

csv_cleaned_path = "/Users/galbeeir/Desktop/git/crime_analysis/crime_cleaned.csv"
zip_cleaned_path_1 = "/Users/galbeeir/Desktop/git/crime_analysis/filtered_1.zip"
zip_cleaned_path_2 = "/Users/galbeeir/Desktop/git/crime_analysis/filtered_2.zip"

export_to_zip(csv_cleaned_path, zip_cleaned_path_1, zip_cleaned_path_2)


  crime_df = pd.read_csv(csv_path)
