In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import json

# Crime data

In [2]:

crime_data = pd.read_csv('Crimes_-_2019.csv', sep=',', header=0)

Next we remove rows with no location information and change the type of the date column.

In [3]:
crime_data = crime_data[crime_data.Latitude.notnull()]
crime_data.Date = pd.to_datetime(crime_data.Date)

In [4]:
crime_data.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
11,12098557,JD288415,2019-02-01 00:01:00,025XX W 50TH ST,0460,BATTERY,SIMPLE,RESIDENCE,False,True,...,14.0,63,08B,1160373.0,1871438.0,2019,07/09/2020 03:44:39 PM,41.802925,-87.687367,"(41.802924631, -87.687367104)"
22,12082526,JD269406,2019-09-24 12:00:00,050XX N KENMORE AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,False,False,...,48.0,3,11,1168334.0,1933969.0,2019,06/24/2020 03:40:28 PM,41.974346,-87.656361,"(41.974346203, -87.656361214)"
25,11859264,JC470909,2019-10-13 06:40:00,012XX S KARLOV AVE,1310,CRIMINAL DAMAGE,TO PROPERTY,APARTMENT,False,True,...,24.0,29,14,1149246.0,1894056.0,2019,10/20/2019 07:09:58 PM,41.865214,-87.72759,"(41.865213688, -87.727590376)"
35,11662417,JC232642,2019-04-21 12:30:00,009XX E 80TH ST,031A,ROBBERY,ARMED - HANDGUN,RESIDENCE,False,False,...,8.0,44,03,1184044.0,1852159.0,2019,09/14/2023 03:41:59 PM,41.7495,-87.601157,"(41.749500329, -87.6011574)"
39,12990873,JG161829,2019-08-17 13:14:00,008XX N KARLOV AVE,1751,OFFENSE INVOLVING CHILDREN,CRIMINAL SEXUAL ABUSE BY FAMILY MEMBER,RESIDENCE,True,True,...,37.0,23,17,1148899.0,1905351.0,2019,09/14/2023 03:41:59 PM,41.896215,-87.728572,"(41.89621515, -87.728572048)"


In [5]:
crime_data.shape

(259169, 22)

Note that the crime data contains all incidents that occured between 01 Jan 2019 to 31st Dec 2019.

In [6]:
print(min(crime_data.Date), max(crime_data.Date))

2019-01-01 00:00:00 2019-12-31 23:55:00


cleanig the data 

In [7]:
crime_data.groupby(['Primary Type'])['ID'].count()

Primary Type
ARSON                                  374
ASSAULT                              20601
BATTERY                              49474
BURGLARY                              9633
CONCEALED CARRY LICENSE VIOLATION      217
CRIM SEXUAL ASSAULT                    908
CRIMINAL DAMAGE                      26611
CRIMINAL SEXUAL ASSAULT                676
CRIMINAL TRESPASS                     6805
DECEPTIVE PRACTICE                   18236
GAMBLING                               142
HOMICIDE                               508
HUMAN TRAFFICKING                       13
INTERFERENCE WITH PUBLIC OFFICER      1544
INTIMIDATION                           163
KIDNAPPING                             172
LIQUOR LAW VIOLATION                   232
MOTOR VEHICLE THEFT                   8962
NARCOTICS                            14996
NON-CRIMINAL                             4
OBSCENITY                               59
OFFENSE INVOLVING CHILDREN            2349
OTHER NARCOTIC VIOLATION                 

In [8]:
crime_data.loc[crime_data['Primary Type'] == 'CRIM SEXUAL ASSAULT', 'Primary Type'] = 'CRIMINAL SEXUAL ASSAULT' 

### mapping for Chicago Neighborhoods and their Latitude and Longitude Boundaries


In [9]:
chicago_neighborhoods = pd.read_csv('IL-Regions.csv', sep='\t', header = None)

In [10]:
chicago_neighborhoods.head()

Unnamed: 0,0,1,2,3,4,5,6
0,IL,Cook,Chicago,Chatham,273222,218,"-87.597208915594;41.751072022231,-87.597240915..."
1,IL,Cook,Chicago,North Center,269600,163,"-87.673960915665;41.961532022427,-87.673918915..."
2,IL,Cook,Chicago,O'hare,269603,303,"-87.836540915817;41.974888022439,-87.836540915..."
3,IL,Cook,Chicago,Washington Park,275927,103,"-87.615601915611;41.783765022261,-87.615566915..."
4,IL,Cook,Chicago,Garfield Ridge,269584,271,"-87.738570915725;41.822185022297,-87.738576915..."


In [11]:
chicago_neighborhoods.columns = ['state', 'county','city', 'neighbourhood','regionid', 'total_potins', 'coordinates']

In [12]:
chicago_neighborhoods['coordinates2'] = chicago_neighborhoods['coordinates'].apply(lambda x: [[float(coord.split(';')[0]), float(coord.split(';')[1])] for coord in x.split(',')])

In [13]:
chicago_hoods = {x:y for x, y in zip(chicago_neighborhoods['regionid'], chicago_neighborhoods['coordinates2'])}
chicago_region_ids = {x:y for x, y in zip(chicago_neighborhoods['regionid'], chicago_neighborhoods['neighbourhood'])}

### Adding Neighborhood information to the Crime Dataset

We the use the methodolgy given in [Craig M. Booth's github code](https://github.com/craigmbooth/chicago_neighborhood_finder/tree/739deff8f9f349720299b193b4259aa690876e52) to map the latitude-longitude data to the neighborhoods in the Zillow dataset.

In [14]:
def point_inside_polygon(x,y,poly):
    """Return True if the point described by x, y is inside of the polygon
    described by the list of points [(x0, y0), (x1, y1), ... (xn, yn)] in
    ``poly``
    Code from http://www.ariel.com.au/a/python-point-int-poly.html which
    in turn was adapted from C code found at
    http://local.wasp.uwa.edu.au/~pbourke/geometry/insidepoly/
    """
    n = len(poly)
    inside =False

    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xinters:
                        inside = not inside
        p1x,p1y = p2x,p2y

    return inside

As an example we find the neighborhood for a random coordinate point (-87.706665329, 41.800781216) and see that we obtain 'Gage Park' as the corresponding neighborhood.

In [15]:
chicago_region_ids[[e for i,e in enumerate(chicago_hoods.keys()) if point_inside_polygon(-87.706665329, 41.800781216, chicago_hoods[e])][0]]

'Gage Park'

For each crime incident in the crime dataset we obtain the corresponding Zillow regionid in a column.

In [16]:
def addNeighToDataset(data):
    longitude = data['Longitude'].values
    latitude = data['Latitude'].values
    region = []
    for locationx, locationy in zip(longitude,latitude):
        place = [e for i,e in enumerate(chicago_hoods.keys()) if point_inside_polygon(float(locationx), float(locationy), chicago_hoods[e])]
        if len(place) == 1:
            region.append(place[0])
        else:
            region.append(None)

    data['regionid'] = region
    return data


In [17]:
crime_data_with_region = addNeighToDataset(crime_data)

In [18]:
crime_data_with_region = crime_data_with_region[crime_data_with_region.regionid.notnull()].reset_index(drop = True)
crime_data_with_region['neighborhood'] = crime_data_with_region['regionid'].apply(lambda x: chicago_region_ids[x])

In [19]:
crime_data_with_region['Date'] = pd.to_datetime(crime_data_with_region.Date)

In [20]:
crime_data_with_region.head(5)

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,regionid,neighborhood
0,12098557,JD288415,2019-02-01 00:01:00,025XX W 50TH ST,0460,BATTERY,SIMPLE,RESIDENCE,False,True,...,08B,1160373.0,1871438.0,2019,07/09/2020 03:44:39 PM,41.802925,-87.687367,"(41.802924631, -87.687367104)",269582.0,Gage Park
1,12082526,JD269406,2019-09-24 12:00:00,050XX N KENMORE AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,False,False,...,11,1168334.0,1933969.0,2019,06/24/2020 03:40:28 PM,41.974346,-87.656361,"(41.974346203, -87.656361214)",269609.0,Uptown
2,11859264,JC470909,2019-10-13 06:40:00,012XX S KARLOV AVE,1310,CRIMINAL DAMAGE,TO PROPERTY,APARTMENT,False,True,...,14,1149246.0,1894056.0,2019,10/20/2019 07:09:58 PM,41.865214,-87.72759,"(41.865213688, -87.727590376)",269601.0,North Lawndale
3,11662417,JC232642,2019-04-21 12:30:00,009XX E 80TH ST,031A,ROBBERY,ARMED - HANDGUN,RESIDENCE,False,False,...,03,1184044.0,1852159.0,2019,09/14/2023 03:41:59 PM,41.7495,-87.601157,"(41.749500329, -87.6011574)",273222.0,Chatham
4,12990873,JG161829,2019-08-17 13:14:00,008XX N KARLOV AVE,1751,OFFENSE INVOLVING CHILDREN,CRIMINAL SEXUAL ABUSE BY FAMILY MEMBER,RESIDENCE,True,True,...,17,1148899.0,1905351.0,2019,09/14/2023 03:41:59 PM,41.896215,-87.728572,"(41.89621515, -87.728572048)",269585.0,Humboldt Park


In [21]:
crime_data_with_region = crime_data_with_region[['ID', 'Case Number', 'Date', 'Primary Type', 'Description', 'Location Description', 'Arrest', 'Domestic', 'Year', 'regionid', 'neighborhood']]

We save the final crime dataset that contains the neighborhood information as a csv `crime_data_with_neighbourhoods.csv`.

In [22]:
crime_data_with_region.to_csv('crime_data_with_neighbourhood.csv', index = False)

### Obtaining Crime Rates for Neighborhoods (based on population)  

In [23]:
population_data = pd.read_csv('chicago_population_data.csv', sep = ',', header = 0)

In [24]:
population_data = population_data[['GEOG', '2010_POP']]
population_data.head()

Unnamed: 0,GEOG,2010_POP
0,Albany Park,51542
1,Archer Heights,13393
2,Armour Square,13391
3,Ashburn,41081
4,Auburn Gresham,48743


Next we join the population and crime dataset on the neighborhood names. We find that there are a few discrepencies between the two datasets which we can fix manually.

In [25]:
pop_hoods = list(population_data.GEOG.unique())
crime_hoods = list(crime_data_with_region.neighborhood.unique())

In [26]:
for n in pop_hoods:
    if n not in crime_hoods:
        print(n)

Greater Grand Crossing
O'Hare
Portage Park
The Loop


In [27]:
for n in crime_hoods:
    if n not in pop_hoods:
        print(n)

Grand Crossing
Pottage Park
Loop
O'hare


In [28]:
population_data.loc[population_data['GEOG']=='The Loop', 'GEOG'] = 'Loop'
population_data.loc[population_data['GEOG']=='Greater Grand Crossing', 'GEOG'] = 'Grand Crossing'
population_data.loc[population_data['GEOG']=='O\'Hare', 'GEOG'] = 'O\'hare'
population_data.loc[population_data['GEOG']=='Portage Park', 'GEOG'] = 'Pottage Park'

In [29]:
crime_with_pop = pd.merge(crime_data_with_region, population_data, left_on = 'neighborhood', right_on = 'GEOG', how = 'inner')
crime_with_pop = crime_with_pop.drop(columns = ['GEOG'])
crime_with_pop = crime_with_pop.rename(columns = {'2010_POP': 'population'})
crime_with_pop.head(5)

Unnamed: 0,ID,Case Number,Date,Primary Type,Description,Location Description,Arrest,Domestic,Year,regionid,neighborhood,population
0,12098557,JD288415,2019-02-01 00:01:00,BATTERY,SIMPLE,RESIDENCE,False,True,2019,269582.0,Gage Park,39894
1,12073110,JD258449,2019-11-01 08:00:00,BURGLARY,UNLAWFUL ENTRY,CHURCH / SYNAGOGUE / PLACE OF WORSHIP,False,False,2019,269582.0,Gage Park,39894
2,12079127,JD265413,2019-04-01 22:20:00,SEX OFFENSE,AGGRAVATED CRIMINAL SEXUAL ABUSE,APARTMENT,False,False,2019,269582.0,Gage Park,39894
3,11715936,JC298110,2019-06-08 19:45:00,CRIMINAL TRESPASS,TO VEHICLE,STREET,True,False,2019,269582.0,Gage Park,39894
4,11724588,JC308202,2019-06-15 21:06:00,BATTERY,SIMPLE,RESTAURANT,False,False,2019,269582.0,Gage Park,39894


The final crime data is saved  as `crime_final.csv`.

In [30]:
crime_with_pop.to_csv('crime_final.csv', index = False)

## Housing data analysis


In [127]:
zhvi_data = pd.read_csv('zillow_zhvi_neighborhood_dataset.csv', sep = ',', header = 0)

In [128]:
zhvi_data = zhvi_data[zhvi_data['City'] == 'Chicago'].reset_index(drop = True)

In [129]:
zhvi_filtered = zhvi_data[zhvi_data['RegionID'].isin(chicago_region_ids.keys())].reset_index(drop = True)

In [130]:
zhvi_filtered.head(5)

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,1996-01-31,...,2019-12-31,2020-01-31,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30
0,269592,53,Logan Square,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,151456.0,...,427048.0,427606.0,430001.0,433209.0,436357.0,438596.0,441558.0,445053.0,449245.0,454323.0
1,269566,154,Albany Park,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,134612.0,...,340398.0,340724.0,340930.0,341095.0,342050.0,343416.0,344771.0,347298.0,350828.0,356003.0
2,269609,159,Uptown,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,119281.0,...,302682.0,302986.0,303869.0,304993.0,305424.0,305700.0,306121.0,307510.0,309389.0,312127.0
3,269589,163,Lake View,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,221230.0,...,536521.0,536425.0,537520.0,539307.0,540502.0,541240.0,542024.0,543851.0,546650.0,550731.0
4,269605,171,Rogers Park,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,103821.0,...,215315.0,215820.0,216498.0,217510.0,217913.0,218352.0,219027.0,220373.0,222044.0,224347.0


Next we find the average ZHVI for Chicago Neighborhoods between 01 Jan 2019 to 31 Dec 2019.

In [131]:
zhvi_reshaped = zhvi_filtered.melt(id_vars = ['RegionID', 'SizeRank', 'RegionName', 'RegionType', 'StateName',
       'State', 'City', 'Metro', 'CountyName'])
zhvi_reshaped = zhvi_reshaped.rename(columns = {'variable': 'Date', 'value':'ZHVI'})
zhvi_reshaped['Date'] = pd.to_datetime(zhvi_reshaped['Date'])
zhvi_reshaped = zhvi_reshaped[(zhvi_reshaped['Date'] >= '2019-01-01') & (zhvi_reshaped['Date'] < '2020-01-01') & zhvi_reshaped['ZHVI'].notnull()]

In [132]:
zhvi_reshaped.head(5)

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,City,Metro,CountyName,Date,ZHVI
18216,269592,53,Logan Square,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,2019-01-31,430709.0
18217,269566,154,Albany Park,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,2019-01-31,346712.0
18218,269609,159,Uptown,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,2019-01-31,301934.0
18219,269589,163,Lake View,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,2019-01-31,546449.0
18220,269605,171,Rogers Park,Neighborhood,IL,IL,Chicago,Chicago-Naperville-Elgin,Cook County,2019-01-31,216135.0


In [133]:
zhvi_final = zhvi_reshaped.groupby(['RegionID', 'SizeRank', 'RegionName', 'RegionType', 'StateName',
       'State', 'City', 'Metro', 'CountyName']).mean().reset_index()
zhvi_final = zhvi_final.drop(columns=['SizeRank', 'RegionType', 'StateName', 'State','Metro', 'CountyName'])
zhvi_final.head(5)

  'State', 'City', 'Metro', 'CountyName']).mean().reset_index()


Unnamed: 0,RegionID,RegionName,City,ZHVI
0,137634,Burnside,Chicago,139033.833333
1,137841,Clearing,Chicago,246129.833333
2,138129,Dunning,Chicago,294552.416667
3,138166,East Side,Chicago,132832.333333
4,138261,Englewood,Chicago,53393.166667


The final housing data is saved as `housing_zhvi_final.csv`.

In [134]:
zhvi_final.to_csv('housing_zhvi_final.csv', index = False)

Merging the crime and housing data analysis final

In [135]:
final_data = pd.merge(crime_with_pop, zhvi_final, left_on = 'regionid', right_on = 'RegionID',  how = 'inner')
final_data = final_data.drop(columns = ['regionid', 'neighborhood'])
final_data.head(5)

Unnamed: 0,ID,Case Number,Date,Primary Type,Description,Location Description,Arrest,Domestic,Year,population,RegionID,RegionName,City,ZHVI
0,12098557,JD288415,2019-02-01 00:01:00,BATTERY,SIMPLE,RESIDENCE,False,True,2019,39894,269582,Gage Park,Chicago,185517.416667
1,12073110,JD258449,2019-11-01 08:00:00,BURGLARY,UNLAWFUL ENTRY,CHURCH / SYNAGOGUE / PLACE OF WORSHIP,False,False,2019,39894,269582,Gage Park,Chicago,185517.416667
2,12079127,JD265413,2019-04-01 22:20:00,SEX OFFENSE,AGGRAVATED CRIMINAL SEXUAL ABUSE,APARTMENT,False,False,2019,39894,269582,Gage Park,Chicago,185517.416667
3,11715936,JC298110,2019-06-08 19:45:00,CRIMINAL TRESPASS,TO VEHICLE,STREET,True,False,2019,39894,269582,Gage Park,Chicago,185517.416667
4,11724588,JC308202,2019-06-15 21:06:00,BATTERY,SIMPLE,RESTAURANT,False,False,2019,39894,269582,Gage Park,Chicago,185517.416667


In [136]:
final_data.to_csv('final_data.csv', index = False)

## Climate Data analysis

We are using major city global dataset and later we fetch chicago data from it

In [104]:
climate_city = pd.read_csv("GlobalLandTemperaturesByMajorCity.csv")

In [105]:
climate_city.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


In [106]:
unique_city = climate_state['City'].unique()

# Print the unique states
print(unique_states)


['Abidjan' 'Addis Abeba' 'Ahmadabad' 'Aleppo' 'Alexandria' 'Ankara'
 'Baghdad' 'Bangalore' 'Bangkok' 'Belo Horizonte' 'Berlin' 'Bogotá'
 'Bombay' 'Brasília' 'Cairo' 'Calcutta' 'Cali' 'Cape Town' 'Casablanca'
 'Changchun' 'Chengdu' 'Chicago' 'Chongqing' 'Dakar' 'Dalian'
 'Dar Es Salaam' 'Delhi' 'Dhaka' 'Durban' 'Faisalabad' 'Fortaleza' 'Gizeh'
 'Guangzhou' 'Harare' 'Harbin' 'Ho Chi Minh City' 'Hyderabad' 'Ibadan'
 'Istanbul' 'Izmir' 'Jaipur' 'Jakarta' 'Jiddah' 'Jinan' 'Kabul' 'Kano'
 'Kanpur' 'Karachi' 'Kiev' 'Kinshasa' 'Lagos' 'Lahore' 'Lakhnau' 'Lima'
 'London' 'Los Angeles' 'Luanda' 'Madras' 'Madrid' 'Manila' 'Mashhad'
 'Melbourne' 'Mexico' 'Mogadishu' 'Montreal' 'Moscow' 'Nagoya' 'Nagpur'
 'Nairobi' 'Nanjing' 'New Delhi' 'New York' 'Paris' 'Peking' 'Pune'
 'Rangoon' 'Rio De Janeiro' 'Riyadh' 'Rome' 'São Paulo' 'Saint Petersburg'
 'Salvador' 'Santiago' 'Santo Domingo' 'Seoul' 'Shanghai' 'Shenyang'
 'Singapore' 'Surabaya' 'Surat' 'Sydney' 'Taipei' 'Taiyuan' 'Tangshan'
 'Tianjin' 'Toky

We extract chicago city climate data from here

In [107]:
chicago_climate_data = climate_state[climate_state['City'] == 'Chicago']

In [108]:
chicago_climate_data.head(10)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
51674,1743-11-01,5.436,2.205,Chicago,United States,42.59N,87.27W
51675,1743-12-01,,,Chicago,United States,42.59N,87.27W
51676,1744-01-01,,,Chicago,United States,42.59N,87.27W
51677,1744-02-01,,,Chicago,United States,42.59N,87.27W
51678,1744-03-01,,,Chicago,United States,42.59N,87.27W
51679,1744-04-01,8.766,2.357,Chicago,United States,42.59N,87.27W
51680,1744-05-01,11.605,2.102,Chicago,United States,42.59N,87.27W
51681,1744-06-01,17.965,1.989,Chicago,United States,42.59N,87.27W
51682,1744-07-01,21.68,1.786,Chicago,United States,42.59N,87.27W
51683,1744-08-01,,,Chicago,United States,42.59N,87.27W


In [109]:
chicago_climate_data.shape

(3239, 7)

In [110]:
chicago_climate_data.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'City',
       'Country', 'Latitude', 'Longitude'],
      dtype='object')

In [111]:
chicago_climate_data.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
City                              object
Country                           object
Latitude                          object
Longitude                         object
dtype: object

In [112]:
chicago_climate_data.tail(5)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
54908,2013-05-01,13.734,0.863,Chicago,United States,42.59N,87.27W
54909,2013-06-01,17.913,0.669,Chicago,United States,42.59N,87.27W
54910,2013-07-01,21.914,0.322,Chicago,United States,42.59N,87.27W
54911,2013-08-01,22.23,0.373,Chicago,United States,42.59N,87.27W
54912,2013-09-01,19.977,1.033,Chicago,United States,42.59N,87.27W


In [113]:
chicago_climate_data= chicago_climate_data.drop('Country', axis=1)

In [114]:
chicago_climate_data =chicago_climate_data.reindex(columns=['index', 'dt', 'day', 'month', 'year','City','AverageTemperature','AverageTemperatureUncertainty','Latitude','Longitude'])

In [115]:
chicago_climate_data["dt"] = pd.to_datetime(chicago_climate_data["dt"])
chicago_climate_data.reset_index(inplace=True)
chicago_climate_data['day'] = chicago_climate_data["dt"].dt.day
chicago_climate_data['month'] = chicago_climate_data["dt"].dt.month
chicago_climate_data['year'] = chicago_climate_data["dt"].dt.year
chicago_climate_data.head()

Unnamed: 0,level_0,index,dt,day,month,year,City,AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude
0,51674,,1743-11-01,1,11,1743,Chicago,5.436,2.205,42.59N,87.27W
1,51675,,1743-12-01,1,12,1743,Chicago,,,42.59N,87.27W
2,51676,,1744-01-01,1,1,1744,Chicago,,,42.59N,87.27W
3,51677,,1744-02-01,1,2,1744,Chicago,,,42.59N,87.27W
4,51678,,1744-03-01,1,3,1744,Chicago,,,42.59N,87.27W


In [117]:
# Drop the 'index' column in-place
chicago_climate_data.drop(columns=['index'], inplace=True)

In [118]:
# Rename 'level_0' as 'index' in-place
chicago_climate_data.rename(columns={'level_0': 'index'}, inplace=True)

In [119]:
chicago_climate_data.head(5)

Unnamed: 0,index,dt,day,month,year,City,AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude
0,51674,1743-11-01,1,11,1743,Chicago,5.436,2.205,42.59N,87.27W
1,51675,1743-12-01,1,12,1743,Chicago,,,42.59N,87.27W
2,51676,1744-01-01,1,1,1744,Chicago,,,42.59N,87.27W
3,51677,1744-02-01,1,2,1744,Chicago,,,42.59N,87.27W
4,51678,1744-03-01,1,3,1744,Chicago,,,42.59N,87.27W


In [122]:
chicago_climate_data.to_csv("chicago_climate_data.csv",index=False)