In [None]:
!pip install pandas
!pip install numpy
!pip install -U scikit-learn
!pip install seaborn

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression # Modeling for Machine Learning Task
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.feature_selection import RFE
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import RFECV


In [3]:
# Some critical list declarations
file_01_05 = "Chicago_Crimes_2001_to_2005.csv"
file_06_10 = "Chicago_Crimes_2006_to_2010.csv"
file_11_15 = "Chicago_Crimes_2011_to_2015.csv"
file_16_17 = "Chicago_Crimes_2016.csv"
file_17_18 = "Chicago_Crimes_2017.csv"
file_18_19 = "Chicago_Crimes_2018_to_2019.csv"
file_20_22 = "Chicago_Crimes_2020_to_2022.csv"

# Important columns for training
critical_cols = ['Date','Primary Type','Location Description','Arrest','District','Year']
file_path_lst = [file_01_05, file_06_10, file_11_15, file_16_17, 
                 file_17_18, file_18_19, file_20_22]

In [24]:
def import_data(file_path=file_20_22, expect_col=[], file_list=[], read_many=False):
  # When reading multiples file, return a list of frames
  if read_many is True:
    dataFrame_list = []

    for f_name in file_list:
      data_csv = pd.read_csv(f_name)
      dataFrame_list.append(data_csv)

    dataFrames_Final = []
    # Drop unnecessary columns that only retrieve from expected one
    if len(expect_col) > 0:
      for frame in dataFrame_list:
        new_frame = frame.loc[:, expect_col]
        dataFrames_Final.append(new_frame)
    else:
      dataFrames_Final = dataFrame_list

    return dataFrames_Final

  # Case when import 1 single file only
  data_csv = pd.read_csv(file_path)
  data_csv.dropna(inplace=True)           # drop rows missed value
  data_csv.to_csv(file_path, index=False) # Write back

  # Drop unnecessary columns that only retrieve from expected one
  if len(expect_col) > 0:
    data_csv = data_csv.loc[:, expect_col]

  return data_csv

In [5]:
def group_by_col(dataFrame, col_group_name="", col_list=False):
  try:
    if not col_list:
      return dataFrame[col_group_name].value_counts()

    # Performing data count group by for visualization
    my_data = list()
    for column in col_list:
      data_count = dataFrame[column].value_counts()
      my_data.append(data_count)
    return my_data

  except Exception as e:
    print("Internal errors when performing group_by, please check again", str(e))
    return None

In [27]:
# DataFrame Reading from 2001-2022
chicago_GDP_Frames = import_data(file_list=file_path_lst, expect_col=critical_cols, read_many=True)

In [29]:
# First 10 rows chicago crime in 2001-2005
chicago_GDP_Frames[0].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,1/1/2001 11:00,DECEPTIVE PRACTICE,RESIDENCE,False,4,2001
1,1/1/2003 0:01,SEX OFFENSE,RESIDENCE,False,8,2003
2,9/15/2001 2:00,BATTERY,STREET,False,12,2001
3,11/2/2001 18:30,THEFT,OTHER,False,1,2001
4,12/15/2001 2:00,BATTERY,STREET,False,18,2001
5,9/26/2002 13:30,BATTERY,STREET,False,8,2002
6,10/1/2002 19:00,MOTOR VEHICLE THEFT,STREET,False,14,2002
7,11/5/2002 16:40,ROBBERY,STREET,True,8,2002
8,8/7/2005 7:15,THEFT,STREET,False,25,2005
9,10/1/2005 9:00,THEFT,OTHER,True,10,2005


In [30]:
# First 10 rows chicago crime in 2006-2010
chicago_GDP_Frames[1].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,1/1/2007 0:01,CRIM SEXUAL ASSAULT,RESIDENCE,False,11,2007
1,1/1/2010 0:01,CRIM SEXUAL ASSAULT,RESIDENCE,False,6,2010
2,1/1/2007 0:01,CRIM SEXUAL ASSAULT,RESIDENCE,False,10,2007
3,1/1/2008 0:01,CRIM SEXUAL ASSAULT,RESIDENCE,False,11,2008
4,1/1/2007 0:01,CRIM SEXUAL ASSAULT,DAY CARE CENTER,False,25,2007
5,10/24/2008 14:30,DECEPTIVE PRACTICE,APARTMENT,False,16,2008
6,1/24/2009 18:00,OFFENSE INVOLVING CHILDREN,RESIDENCE,True,9,2009
7,7/24/2008 0:01,SEX OFFENSE,APARTMENT,False,11,2008
8,8/4/2009 12:00,DECEPTIVE PRACTICE,RESIDENCE,False,3,2009
9,1/1/2009 0:00,CRIM SEXUAL ASSAULT,RESIDENCE,True,6,2009


In [31]:
# First 10 rows chicago crime in 2011-2015
chicago_GDP_Frames[2].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,9/5/2015 13:30,BATTERY,RESIDENCE,False,9,2015
1,9/4/2015 11:30,THEFT,CTA BUS,False,15,2015
2,9/5/2015 12:45,NARCOTICS,SIDEWALK,True,14,2015
3,9/5/2015 13:00,ASSAULT,APARTMENT,False,15,2015
4,9/5/2015 10:55,BURGLARY,RESIDENCE,False,6,2015
5,9/4/2015 18:00,BURGLARY,RESIDENCE-GARAGE,False,14,2015
6,9/5/2015 13:00,THEFT,GROCERY FOOD STORE,True,10,2015
7,9/5/2015 11:30,ROBBERY,STREET,False,12,2015
8,9/5/2015 14:00,THEFT,PARKING LOT/GARAGE(NON.RESID.),False,8,2015
9,9/5/2015 11:00,BATTERY,SMALL RETAIL STORE,False,16,2015


In [32]:
# First 10 rows chicago crime in 2016-2017
chicago_GDP_Frames[3].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,5/1/2016 0:25,DECEPTIVE PRACTICE,,False,8,2016
1,10/19/2016 19:00,BURGLARY,RESTAURANT,False,4,2016
2,3/29/2016 7:00,DECEPTIVE PRACTICE,OTHER,False,3,2016
3,3/11/2016 23:00,CRIM SEXUAL ASSAULT,RESIDENCE PORCH/HALLWAY,False,17,2016
4,1/1/2016 11:00,DECEPTIVE PRACTICE,RESIDENCE,False,5,2016
5,3/1/2016 0:00,OTHER OFFENSE,AUTO / BOAT / RV DEALERSHIP,False,22,2016
6,12/1/2016 0:01,DECEPTIVE PRACTICE,RESIDENCE,False,3,2016
7,4/1/2016 0:00,DECEPTIVE PRACTICE,RESIDENCE,False,18,2016
8,9/13/2016 9:55,DECEPTIVE PRACTICE,,False,15,2016
9,5/8/2016 9:45,DECEPTIVE PRACTICE,,False,7,2016


In [33]:
# First 10 rows chicago crime in 2017-2018
chicago_GDP_Frames[4].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,10/8/2017 3:00,CRIM SEXUAL ASSAULT,RESIDENCE,False,22.0,2017
1,3/28/2017 14:00,BURGLARY,OTHER,False,8.0,2017
2,9/9/2017 20:17,THEFT,RESIDENCE,False,3.0,2017
3,8/26/2017 10:00,CRIM SEXUAL ASSAULT,HOTEL/MOTEL,False,1.0,2017
4,1/1/2017 0:01,OFFENSE INVOLVING CHILDREN,RESIDENCE,False,10.0,2017
5,7/17/2017 10:10,THEFT,RESIDENCE,False,14.0,2017
6,12/28/2017 15:55,DECEPTIVE PRACTICE,,False,1.0,2017
7,2/10/2017 12:00,CRIMINAL DAMAGE,APARTMENT,False,6.0,2017
8,11/22/2017 2:42,OTHER OFFENSE,APARTMENT,False,17.0,2017
9,7/29/2017 15:40,THEFT,SIDEWALK,False,24.0,2017


In [34]:
# First 10 rows chicago crime in 2018-2019
chicago_GDP_Frames[5].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,9/1/2018 0:01,THEFT,RESIDENCE,False,6,2018
1,1/1/2018 8:00,DECEPTIVE PRACTICE,RESIDENCE,False,25,2018
2,9/24/2019 8:00,DECEPTIVE PRACTICE,COMMERCIAL / BUSINESS OFFICE,False,1,2019
3,10/13/2019 20:30,THEFT,GROCERY FOOD STORE,False,12,2019
4,12/20/2018 16:00,OTHER OFFENSE,RESIDENCE,False,17,2018
5,4/1/2018 0:01,DECEPTIVE PRACTICE,RESIDENCE,False,6,2018
6,12/20/2018 15:00,DECEPTIVE PRACTICE,APARTMENT,False,25,2018
7,10/5/2019 18:30,THEFT,RESIDENCE,False,12,2019
8,12/18/2018 11:00,DECEPTIVE PRACTICE,RESIDENCE,False,19,2018
9,10/13/2019 19:00,CRIMINAL DAMAGE,STREET,False,19,2019


In [35]:
# First 10 rows chicago crime in 2020-2022
chicago_GDP_Frames[6].head(10)

Unnamed: 0,Date,Primary Type,Location Description,Arrest,District,Year
0,3/17/2020 21:30,THEFT,STREET,False,16,2020
1,3/18/2020 2:03,MOTOR VEHICLE THEFT,APARTMENT,False,11,2020
2,3/18/2020 8:50,ASSAULT,RESIDENCE,False,25,2020
3,3/18/2020 13:00,DECEPTIVE PRACTICE,OTHER (SPECIFY),False,11,2020
4,3/18/2020 17:35,NARCOTICS,SIDEWALK,True,15,2020
5,3/16/2020 0:05,THEFT,STREET,False,2,2020
6,3/18/2020 18:00,ROBBERY,STREET,False,10,2020
7,3/18/2020 14:04,BATTERY,RESIDENCE,False,4,2020
8,3/18/2020 21:27,OTHER OFFENSE,SIDEWALK,False,9,2020
9,3/18/2020 7:30,THEFT,STREET,False,25,2020
