#                                          Chicago Crime Analyse



# Introduction

1.  Understand three Chicago datasets
2.  Load the three datasets into three tables in a SQLIte database
3.  Execute SQL queries to answer problems


In [12]:
# Avoid prettytable default error.
#!pip install ipython-sql prettytable

import prettytable

prettytable.DEFAULT = 'DEFAULT'

### Store the datasets in database tables

To analyze the data using SQL, it first needs to be loaded into SQLite DB.
We will create three tables in as under:

1.  **CENSUS_DATA**
2.  **CHICAGO_PUBLIC_SCHOOLS**
3.  **CHICAGO_CRIME_DATA**


Load the `pandas` and `sqlite3` libraries and establish a connection to `FinalDB.db`


In [1]:
import pandas as pd
import sqlite3


  from pandas.core import (


Load the SQL magic module


In [7]:
%load_ext sql


The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [3]:
#Establish a connection between SQL magic module and the database `FinalDB.db`

con = sqlite3.connect("FinalDB.db")
cur = con.cursor()
%sql sqlite:///FinalDB.db

Use `Pandas` to load the data available in the links above to dataframes. Use these dataframes to load data on to the database `FinalDB.db` as required tables.


78

In [10]:
# Load CSV files into Pandas DataFrames and then to SQLite tables
try:
    df1 = pd.read_csv('ChicagoCensusData.csv')
    df1.to_sql("ChicagoCensusData", con, if_exists='replace', index=False, method="multi")

    df2 = pd.read_csv('ChicagoPublicSchools.csv')
    df2.to_sql("ChicagoPublicSchools", con, if_exists='replace', index=False, method="multi")

    df3 = pd.read_csv('ChicagoCrimeData.csv')
    df3.to_sql("ChicagoCrimeData", con, if_exists='replace', index=False, method="multi")

    print("Data loaded successfully into FinalDB.db.")
except Exception as e:
    print(f"Error loading data: {e}")

Data loaded successfully into FinalDB.db.


In [15]:
# Query the database system catalog to retrieve table metadata
%sql SELECT name FROM sqlite_master WHERE type='table' ;

 * sqlite:///FinalDB.db
Done.


name
ChicagoCensusData
ChicagoPublicSchools
ChicagoCrimeData


In [17]:
%sql PRAGMA table_info([ChicagoCensusData])

 * sqlite:///FinalDB.db
Done.


cid,name,type,notnull,dflt_value,pk
0,COMMUNITY_AREA_NUMBER,REAL,0,,0
1,COMMUNITY_AREA_NAME,TEXT,0,,0
2,PERCENT_OF_HOUSING_CROWDED,REAL,0,,0
3,PERCENT_HOUSEHOLDS_BELOW_POVERTY,REAL,0,,0
4,PERCENT_AGED_16__UNEMPLOYED,REAL,0,,0
5,PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA,REAL,0,,0
6,PERCENT_AGED_UNDER_18_OR_OVER_64,REAL,0,,0
7,PER_CAPITA_INCOME,INTEGER,0,,0
8,HARDSHIP_INDEX,REAL,0,,0


In [18]:
%sql PRAGMA table_info([ChicagoPublicSchools])

 * sqlite:///FinalDB.db
Done.


cid,name,type,notnull,dflt_value,pk
0,School_ID,INTEGER,0,,0
1,NAME_OF_SCHOOL,TEXT,0,,0
2,"Elementary, Middle, or High School",TEXT,0,,0
3,Street_Address,TEXT,0,,0
4,City,TEXT,0,,0
5,State,TEXT,0,,0
6,ZIP_Code,INTEGER,0,,0
7,Phone_Number,TEXT,0,,0
8,Link,TEXT,0,,0
9,Network_Manager,TEXT,0,,0


In [19]:

%sql PRAGMA table_info([ChicagoCrimeData])

 * sqlite:///FinalDB.db
Done.


cid,name,type,notnull,dflt_value,pk
0,ID,INTEGER,0,,0
1,CASE_NUMBER,TEXT,0,,0
2,DATE,TEXT,0,,0
3,BLOCK,TEXT,0,,0
4,IUCR,TEXT,0,,0
5,PRIMARY_TYPE,TEXT,0,,0
6,DESCRIPTION,TEXT,0,,0
7,LOCATION_DESCRIPTION,TEXT,0,,0
8,ARREST,INTEGER,0,,0
9,DOMESTIC,INTEGER,0,,0


## Problems


### Problem 1

##### Find the total number of crimes recorded in the CRIME table.


In [21]:
%sql select count(*) from ChicagoCrimeData

 * sqlite:///FinalDB.db
Done.


count(*)
533


### Problem 2

##### List community area names and numbers with per capita income less than 11000.


In [25]:
%sql SELECT COMMUNITY_AREA_NUMBER, COMMUNITY_AREA_NAME \
FROM ChicagoCensusData WHERE PER_CAPITA_INCOME < 11000

 * sqlite:///FinalDB.db
Done.


COMMUNITY_AREA_NUMBER,COMMUNITY_AREA_NAME
26.0,West Garfield Park
30.0,South Lawndale
37.0,Fuller Park
54.0,Riverdale


### Problem 3

##### List all case numbers for crimes involving minors?(children are not considered minors for the purposes of crime analysis) 


In [29]:
df1.head(3)

Unnamed: 0,COMMUNITY_AREA_NUMBER,COMMUNITY_AREA_NAME,PERCENT_OF_HOUSING_CROWDED,PERCENT_HOUSEHOLDS_BELOW_POVERTY,PERCENT_AGED_16__UNEMPLOYED,PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA,PERCENT_AGED_UNDER_18_OR_OVER_64,PER_CAPITA_INCOME,HARDSHIP_INDEX
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0
2,3.0,Uptown,3.8,24.0,8.9,11.8,22.2,35787,20.0


In [27]:
df3.head(5)

Unnamed: 0,ID,CASE_NUMBER,DATE,BLOCK,IUCR,PRIMARY_TYPE,DESCRIPTION,LOCATION_DESCRIPTION,ARREST,DOMESTIC,...,DISTRICT,WARD,COMMUNITY_AREA_NUMBER,FBICODE,X_COORDINATE,Y_COORDINATE,YEAR,LATITUDE,LONGITUDE,LOCATION
0,3512276,HK587712,2004-08-28,047XX S KEDZIE AVE,890,THEFT,FROM BUILDING,SMALL RETAIL STORE,False,False,...,9,14.0,58.0,6,1155838.0,1873050.0,2004,41.80744,-87.703956,"(41.8074405, -87.703955849)"
1,3406613,HK456306,2004-06-26,009XX N CENTRAL PARK AVE,820,THEFT,$500 AND UNDER,OTHER,False,False,...,11,27.0,23.0,6,1152206.0,1906127.0,2004,41.89828,-87.716406,"(41.898279962, -87.716405505)"
2,8002131,HT233595,2011-04-04,043XX S WABASH AVE,820,THEFT,$500 AND UNDER,NURSING HOME/RETIREMENT HOME,False,False,...,2,3.0,38.0,6,1177436.0,1876313.0,2011,41.815933,-87.624642,"(41.815933131, -87.624642127)"
3,7903289,HT133522,2010-12-30,083XX S KINGSTON AVE,840,THEFT,FINANCIAL ID THEFT: OVER $300,RESIDENCE,False,False,...,4,7.0,46.0,6,1194622.0,1850125.0,2010,41.743665,-87.562463,"(41.743665322, -87.562462756)"
4,10402076,HZ138551,2016-02-02,033XX W 66TH ST,820,THEFT,$500 AND UNDER,ALLEY,False,False,...,8,15.0,66.0,6,1155240.0,1860661.0,2016,41.773455,-87.70648,"(41.773455295, -87.706480471)"


In [34]:
%sql SELECT CASE_NUMBER FROM ChicagoCrimeData WHERE DESCRIPTION LIKE '%minor%'


 * sqlite:///FinalDB.db
Done.


CASE_NUMBER
HL266884
HK238408


### Problem 4

##### List all kidnapping crimes involving a child?


In [36]:
%sql SELECT CASE_NUMBER FROM ChicagoCrimeData \
WHERE DESCRIPTION LIKE '%child%' AND PRIMARY_TYPE = 'kidnapping'

 * sqlite:///FinalDB.db
Done.


CASE_NUMBER


In [39]:
%sql SELECT CASE_NUMBER FROM ChicagoCrimeData \
WHERE LOWER(DESCRIPTION) LIKE '%child%' AND LOWER(PRIMARY_TYPE) = 'kidnapping';


 * sqlite:///FinalDB.db
Done.


CASE_NUMBER
HN144152


### Problem 5

##### List the kind of crimes that were recorded at schools. (No repetitions)


In [41]:
%sql SELECT DISTINCT PRIMARY_TYPE,LOCATION_DESCRIPTION FROM ChicagoCrimeData \
WHERE LOWER(LOCATION_DESCRIPTION) LIKE '%school%';

 * sqlite:///FinalDB.db
Done.


PRIMARY_TYPE,LOCATION_DESCRIPTION
BATTERY,"SCHOOL, PUBLIC, GROUNDS"
BATTERY,"SCHOOL, PUBLIC, BUILDING"
CRIMINAL DAMAGE,"SCHOOL, PUBLIC, GROUNDS"
NARCOTICS,"SCHOOL, PUBLIC, GROUNDS"
NARCOTICS,"SCHOOL, PUBLIC, BUILDING"
ASSAULT,"SCHOOL, PUBLIC, GROUNDS"
CRIMINAL TRESPASS,"SCHOOL, PUBLIC, GROUNDS"
PUBLIC PEACE VIOLATION,"SCHOOL, PRIVATE, BUILDING"
PUBLIC PEACE VIOLATION,"SCHOOL, PUBLIC, BUILDING"


### Problem 6

##### List the type of schools along with the average safety score for each type.


In [45]:
%sql SELECT [Elementary, Middle, or High School], AVG(SAFETY_SCORE) \
FROM ChicagoPublicSchools\
GROUP BY [Elementary, Middle, or High School]

 * sqlite:///FinalDB.db
Done.


"Elementary, Middle, or High School",AVG(SAFETY_SCORE)
ES,49.52038369304557
HS,49.62352941176471
MS,48.0


### Problem 7

##### List 5 community areas with highest % of households below poverty line


In [48]:
%sql SELECT COMMUNITY_AREA_NAME \
FROM ChicagoCensusData \
ORDER BY PERCENT_HOUSEHOLDS_BELOW_POVERTY DESC \
LIMIT 5 ;

 * sqlite:///FinalDB.db
Done.


COMMUNITY_AREA_NAME
Riverdale
Fuller Park
Englewood
North Lawndale
East Garfield Park


### Problem 8

##### Which community area is most crime prone? Display the coumminty area number only.


In [57]:
%sql SELECT a.COMMUNITY_AREA_NUMBER FROM \
( SELECT COMMUNITY_AREA_NUMBER, COUNT(CASE_NUMBER) \
FROM ChicagoCrimeData \
GROUP BY COMMUNITY_AREA_NUMBER\
ORDER BY COUNT(CASE_NUMBER) DESC LIMIT 1 ) a

 * sqlite:///FinalDB.db
Done.


COMMUNITY_AREA_NUMBER
25.0



Double-click **here** for a hint

<!--
Query for the 'community area number' that has most number of incidents
-->


### Problem 9

##### Use a sub-query to find the name of the community area with highest hardship index


In [64]:
%sql SELECT COMMUNITY_AREA_NUMBER, COUNT()
FROM ChicagoCrimeData \
WHERE HARDSHIP_INDEX = (SELECT MAX(HARDSHIP_INDEX) FROM ChicagoCensusData)


 * sqlite:///FinalDB.db
Done.


COMMUNITY_AREA_NAME
Riverdale


### Problem 10

##### Use a sub-query to determine the Community Area Name with most number of crimes?


In [71]:
%sql SELECT c.COMMUNITY_AREA_NAME, cc.CrimeCount \
FROM ChicagoCensusData c \
LEFT JOIN ( \
    SELECT COMMUNITY_AREA_NUMBER, COUNT(CASE_NUMBER) AS CrimeCount \
    FROM ChicagoCrimeData \
    GROUP BY COMMUNITY_AREA_NUMBER \
) cc ON c.COMMUNITY_AREA_NUMBER = cc.COMMUNITY_AREA_NUMBER \
ORDER BY cc.CrimeCount DESC \
LIMIT 1;


 * sqlite:///FinalDB.db
Done.


COMMUNITY_AREA_NAME,CrimeCount
Austin,43


## Author(s)

<h4> Hima Vasudevan </h4>
<h4> Rav Ahuja </h4>
<h4> Ramesh Sannreddy </h4>

## Contribtuor(s)

<h4> Malika Singla </h4>
<h4>Abhishek Gagneja</h4>
<!--
## Change log

| Date       | Version | Changed by        | Change Description                             |
| ---------- | ------- | ----------------- | ---------------------------------------------- |
|2023-10-18  | 2.6     | Abhishek Gagneja  | Modified instruction set |
| 2022-03-04 | 2.5     | Lakshmi Holla     | Changed markdown.                   |
| 2021-05-19 | 2.4     | Lakshmi Holla     | Updated the question                           |
| 2021-04-30 | 2.3     | Malika Singla     | Updated the libraries                          |
| 2021-01-15 | 2.2     | Rav Ahuja         | Removed problem 11 and fixed changelog         |
| 2020-11-25 | 2.1     | Ramesh Sannareddy | Updated the problem statements, and datasets   |
| 2020-09-05 | 2.0     | Malika Singla     | Moved lab to course repo in GitLab             |
| 2018-07-18 | 1.0     | Rav Ahuja         | Several updates including loading instructions |
| 2018-05-04 | 0.1     | Hima Vasudevan    | Created initial version                        |
-->
## <h3 align="center"> © IBM Corporation 2023. All rights reserved. <h3/>
