# This Notebook is designed to go through the Capstone 2 :Data Wrangling portion of the Springboard Data Science Career Track 

In [1]:
#import libraries 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Denver Crime Data Wrangling

## 1 Data Loading

In [2]:
#data is sourced from kaggle
data_url = 'https://www.kaggle.com/datasets/paultimothymooney/denver-crime-data'
direct_from_source = 'https://www.denvergov.org/opendata/dataset/city-and-county-of-denver-crime'

#import .csv file into pandas DataFrame called denver_crime:

denver_crime = pd.read_csv('data/crime.csv',index_col=0)
denver_crime.head()

Unnamed: 0,incident_id,offense_id,OFFENSE_CODE,OFFENSE_CODE_EXTENSION,OFFENSE_TYPE_ID,OFFENSE_CATEGORY_ID,FIRST_OCCURRENCE_DATE,LAST_OCCURRENCE_DATE,REPORTED_DATE,INCIDENT_ADDRESS,GEO_X,GEO_Y,GEO_LON,GEO_LAT,DISTRICT_ID,PRECINCT_ID,NEIGHBORHOOD_ID,IS_CRIME,IS_TRAFFIC
0,20226000193,20226000193299900,2999,0,criminal-mischief-other,public-disorder,2022-01-04 11:30:00,2022-01-04 12:00:00,2022-01-04 20:36:00,128 S CANOSA CT,3135366.0,1685410.0,-105.018825,39.714268,4.0,411.0,valverde,1,0
1,20223319,20223319299900,2999,0,criminal-mischief-other,public-disorder,2022-01-03 06:45:00,,2022-01-03 11:01:00,650 15TH ST,3142454.0,1696151.0,-104.993418,39.743649,6.0,611.0,cbd,1,0
2,20223093,20223093299900,2999,0,criminal-mischief-other,public-disorder,2022-01-03 01:00:00,,2022-01-03 06:11:00,919 E COLFAX AVE,3147484.0,1694898.0,-104.975557,39.74013,6.0,621.0,north-capitol-hill,1,0
3,20224000,20224000299900,2999,0,criminal-mischief-other,public-disorder,2022-01-03 19:47:00,,2022-01-03 21:12:00,2345 W ALAMEDA AVE,3136478.0,1684414.0,-105.014892,39.711518,4.0,411.0,valverde,1,0
4,20223956,20223956299900,2999,0,criminal-mischief-other,public-disorder,2022-01-03 17:06:00,,2022-01-03 20:31:00,7800 E SMITH RD,3169237.0,1705800.0,-104.89795,39.769688,5.0,512.0,central-park,1,0


## 2 Data Organization

this notebook can be found on github at this address

[Denver Crime Page](https://github.com/Seismic-Striker/Springboard_and_Datacamp/blob/5558bc93e52b4056bea07eb60169a5e4d8180fae/Denver%20Crime%20Data/Denver%20Crime%20Data.ipynb)


## 3 Data Definition

In [3]:
#explore the dataset
print(denver_crime.shape)

(470278, 19)


In [4]:
#470,000 + rows of crimes, and 19 columns

denver_crime.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 470278 entries, 0 to 470277
Data columns (total 19 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   incident_id             470278 non-null  int64  
 1   offense_id              470278 non-null  int64  
 2   OFFENSE_CODE            470278 non-null  int64  
 3   OFFENSE_CODE_EXTENSION  470278 non-null  int64  
 4   OFFENSE_TYPE_ID         470278 non-null  object 
 5   OFFENSE_CATEGORY_ID     470278 non-null  object 
 6   FIRST_OCCURRENCE_DATE   470278 non-null  object 
 7   LAST_OCCURRENCE_DATE    174528 non-null  object 
 8   REPORTED_DATE           470278 non-null  object 
 9   INCIDENT_ADDRESS        432413 non-null  object 
 10  GEO_X                   466247 non-null  float64
 11  GEO_Y                   466247 non-null  float64
 12  GEO_LON                 466246 non-null  float64
 13  GEO_LAT                 466246 non-null  float64
 14  DISTRICT_ID         

In [5]:
# looks like we have some geographic information as well as date/time data. 
# according to the dataset crimes involving sexual assault do not include location data 
# and some crimes do not have an address that can be connected to it
#We also have a lot of missing values from the 'LAST_OCCURANCE_DATE' column, but we'll address that later. 


# lets see how many unique values we have in each of the columns
print(denver_crime.nunique())

incident_id               440995
offense_id                470278
OFFENSE_CODE                 154
OFFENSE_CODE_EXTENSION         6
OFFENSE_TYPE_ID              198
OFFENSE_CATEGORY_ID           15
FIRST_OCCURRENCE_DATE     312618
LAST_OCCURRENCE_DATE      123899
REPORTED_DATE             399247
INCIDENT_ADDRESS           96792
GEO_X                      47135
GEO_Y                      44611
GEO_LON                    95500
GEO_LAT                    94858
DISTRICT_ID                    7
PRECINCT_ID                   36
NEIGHBORHOOD_ID               78
IS_CRIME                       2
IS_TRAFFIC                     2
dtype: int64


### Data columns
#### So here we have the count of unique values in each column and the names and datatypes


- 'incident_id', 'offense_id'
    - These columns are identifiers and are indexers, so they have no additional analyltical power.


- 'OFFENSE_CODE' & 'OFFENSE_CODE_EXTENSION'
    - These are codes that match with the 'OFFENSE_TYPE_ID' columns, so they are a little redudant. 
    - Categorical


- 'OFFENSE_TYPE_ID',  'OFFENSE_CATEGORY_ID'
    - These are very good data points. The Offense category id is the umbrella under the which the offense type id lives. both could be good for classification/prediction possibly.
    - Categorical

- 'FIRST_OCCURRENCE_DATE', 'LAST_OCCURRENCE_DATE', 'REPORTED_DATE',
    - Looking at the data this is when it happened, when it stopped happening, and when it was called in/reported. first occurance date should be fine. 
    - Datetime data

- 'INCIDENT_ADDRESS',
    - This is the physical address of the site of the crime. May or may not be present. There are around 38,000 missing values for this.
    
    
- 'GEO_X', 'GEO_Y', 'GEO_LON', 'GEO_LAT'
    - These are geo locations. X and Y are useless to us for mapping so we'll only keep the ones with longitude and latitude notation. 


- 'DISTRICT_ID', 'PRECINCT_ID'
    - There are 7 districts and 36 precinct ID's. This isn't terribly inmormative but could be tied to geolocaiton data later. 
    - Categorical

- 'NEIGHBORHOOD_ID'
    - I like having this data. 
    -Categorical

- 'IS_CRIME', 'IS_TRAFFIC'
    - Binary variable determining whether it is a crime or a traffic violation. redundant because of offense type and category values.  

## 4 Preliminary Data Cleaning

In [6]:
#Now lets look for missing values.
denver_crime.isna().sum()

incident_id                    0
offense_id                     0
OFFENSE_CODE                   0
OFFENSE_CODE_EXTENSION         0
OFFENSE_TYPE_ID                0
OFFENSE_CATEGORY_ID            0
FIRST_OCCURRENCE_DATE          0
LAST_OCCURRENCE_DATE      295750
REPORTED_DATE                  0
INCIDENT_ADDRESS           37865
GEO_X                       4031
GEO_Y                       4031
GEO_LON                     4032
GEO_LAT                     4032
DISTRICT_ID                    1
PRECINCT_ID                    1
NEIGHBORHOOD_ID                1
IS_CRIME                       0
IS_TRAFFIC                     0
dtype: int64

In [7]:
#The data that looks interesting is here
columns_to_keep = ['OFFENSE_TYPE_ID','OFFENSE_CATEGORY_ID','FIRST_OCCURRENCE_DATE','REPORTED_DATE',
                   'GEO_LON','GEO_LAT','NEIGHBORHOOD_ID']

#now lets make a new df from this selection
crime_df = denver_crime[columns_to_keep]
#lets copy the df to allow us to edit the columns without warnings/being attached to the original dataframe
crime_df = crime_df.copy()
#now lets rename the remaining columns
newcols = {
    'OFFENSE_TYPE_ID':'crime_type',
    'OFFENSE_CATEGORY_ID':'crime_category',
    'FIRST_OCCURRENCE_DATE':'date',
    'REPORTED_DATE':'reported',
    'GEO_LON':'y_lon',
    'GEO_LAT':'x_lat',
    'NEIGHBORHOOD_ID':'neighborhood'
}
crime_df.rename(columns=newcols,inplace=True)
crime_df.head()

Unnamed: 0,crime_type,crime_category,date,reported,y_lon,x_lat,neighborhood
0,criminal-mischief-other,public-disorder,2022-01-04 11:30:00,2022-01-04 20:36:00,-105.018825,39.714268,valverde
1,criminal-mischief-other,public-disorder,2022-01-03 06:45:00,2022-01-03 11:01:00,-104.993418,39.743649,cbd
2,criminal-mischief-other,public-disorder,2022-01-03 01:00:00,2022-01-03 06:11:00,-104.975557,39.74013,north-capitol-hill
3,criminal-mischief-other,public-disorder,2022-01-03 19:47:00,2022-01-03 21:12:00,-105.014892,39.711518,valverde
4,criminal-mischief-other,public-disorder,2022-01-03 17:06:00,2022-01-03 20:31:00,-104.89795,39.769688,central-park


In [8]:
crime_df.isna().sum()

crime_type           0
crime_category       0
date                 0
reported             0
y_lon             4032
x_lat             4032
neighborhood         1
dtype: int64

In [9]:
missing = crime_df[crime_df.y_lon.isnull()]
missing.head()

Unnamed: 0,crime_type,crime_category,date,reported,y_lon,x_lat,neighborhood
93,sex-aslt-fondle-adult-victim,sexual-assault,2022-01-02 13:00:00,2022-01-02 15:00:00,,,speer
348,sex-aslt-rape,sexual-assault,2022-01-03 12:00:00,2022-01-05 03:53:00,,,northeast-park-hill
349,sex-aslt-non-rape,sexual-assault,2022-01-06 20:30:00,2022-01-06 20:40:00,,,hampden
51866,sex-aslt-fondle-adult-victim,sexual-assault,2020-02-28 16:00:00,2020-02-28 20:13:00,,,hampden
51867,sex-aslt-fondle-adult-victim,sexual-assault,2018-08-14 11:35:00,2018-08-14 12:08:00,,,central-park


In [10]:
# according to the dataset crimes involving sexual assault do not include location data 
crime_df = crime_df[crime_df['crime_category'] != 'sexual-assault']
crime_df.isna().sum()

crime_type        0
crime_category    0
date              0
reported          0
y_lon             1
x_lat             1
neighborhood      1
dtype: int64

In [11]:
# remove stragglers
crime_df.dropna(inplace=True)
crime_df.reset_index(drop=True, inplace=True)
crime_df.isna().sum()

crime_type        0
crime_category    0
date              0
reported          0
y_lon             0
x_lat             0
neighborhood      0
dtype: int64

## 5 Converting datatypes

In [12]:
#convert time columns to datetime format
#select columns to change
date_columns = ['date','reported']

#apply change to columns, being sure to '=' them so as to make the change permanent
crime_df[date_columns] = crime_df[date_columns].apply(pd.to_datetime)

In [13]:
# Now lets see the datatypes
crime_df[date_columns].dtypes

date        datetime64[ns]
reported    datetime64[ns]
dtype: object

In [14]:
# For easier processing on visualizations further into this process
crime_df['dates'] = crime_df['date'].dt.date
crime_df['times'] = crime_df['date'].dt.time
crime_df

Unnamed: 0,crime_type,crime_category,date,reported,y_lon,x_lat,neighborhood,dates,times
0,criminal-mischief-other,public-disorder,2022-01-04 11:30:00,2022-01-04 20:36:00,-105.018825,39.714268,valverde,2022-01-04,11:30:00
1,criminal-mischief-other,public-disorder,2022-01-03 06:45:00,2022-01-03 11:01:00,-104.993418,39.743649,cbd,2022-01-03,06:45:00
2,criminal-mischief-other,public-disorder,2022-01-03 01:00:00,2022-01-03 06:11:00,-104.975557,39.740130,north-capitol-hill,2022-01-03,01:00:00
3,criminal-mischief-other,public-disorder,2022-01-03 19:47:00,2022-01-03 21:12:00,-105.014892,39.711518,valverde,2022-01-03,19:47:00
4,criminal-mischief-other,public-disorder,2022-01-03 17:06:00,2022-01-03 20:31:00,-104.897950,39.769688,central-park,2022-01-03,17:06:00
...,...,...,...,...,...,...,...,...,...
466241,fraud-by-telephone,white-collar-crime,2022-02-25 08:00:00,2022-02-26 09:22:00,-105.084098,39.629311,marston,2022-02-25,08:00:00
466242,fraud-by-telephone,white-collar-crime,2022-01-27 09:00:00,2022-02-26 10:21:00,-104.823123,39.797813,montbello,2022-01-27,09:00:00
466243,fraud-by-use-of-computer,white-collar-crime,2022-02-24 12:00:00,2022-02-25 23:30:00,-104.920013,39.721561,hilltop,2022-02-24,12:00:00
466244,theft-of-services,larceny,2022-02-27 12:00:00,2022-02-27 18:22:00,-105.025716,39.742973,west-colfax,2022-02-27,12:00:00


## 6 Adding supplemental datasets

We are going to add data based on the neighborhood data in this dataset. This will be merged on the neighborhood column. 

- the primary goal will be to make all the 78 unique values uniform to allow that merge to work

neighborhood_url =' https://www.denvergov.org/opendata/dataset/city-and-county-of-denver-equity-index-2020-neighborhood'
cencus_neighborhood_demographics = 'https://www.denvergov.org/opendata/dataset/city-and-county-of-denver-census-neighborhood-demographics-2010'


### 6.1 Adding equity data

In [15]:
equity= pd.read_csv('data/equity_index_2020___neighborhood.csv')
equity.head()

Unnamed: 0,NBRHD_NAME,SOCIOECON_SCORE,ACCESSTOCARE_SCORE,MORBIDITY_SCORE,MORTALITY_SCORE,BUILTENV_SCORE,OVERALLEQUITY_SCORE
0,University Hills,4.0,4.5,2.5,3.0,3.5,3.5
1,Harvey Park,2.0,2.5,2.0,3.0,1.5,2.2
2,Mar Lee,2.0,1.5,2.5,2.0,3.5,2.3
3,East Colfax,1.5,1.5,3.5,1.0,2.0,1.9
4,Capitol Hill,4.0,2.5,4.0,1.0,4.0,3.1


In [16]:
cols_to_keep = ['NBRHD_NAME','OVERALLEQUITY_SCORE']
equity = equity[cols_to_keep]
colnames ={
    'NBRHD_NAME':'neighborhood',
    'OVERALLEQUITY_SCORE':'equity_score'
}
equity.rename(columns=colnames,inplace=True)
equity.head()

Unnamed: 0,neighborhood,equity_score
0,University Hills,3.5
1,Harvey Park,2.2
2,Mar Lee,2.3
3,East Colfax,1.9
4,Capitol Hill,3.1


###  6.2 Adding population data

In [17]:
demos=pd.read_csv('data/census_neighborhood_demographics_2010.csv')
demos.head()

Unnamed: 0,NBHD_ID,NBRHD_NAME,POPULATION_2010,HISPANIC_2010,WHITE_2010,BLACK_2010,NATIVEAM_2010,ASIAN_2010,HAWPACIS_2010,OTHER_2010,...,RENTED_AGE_25_TO_34,RENTED_AGE_35_TO_44,RENTED_AGE_45_TO_54,RENTED_AGE_55_TO_59,RENTED_AGE_60_TO_64,RENTED_AGE_65_TO_74,RENTED_AGE_75_TO_84,RENTED_AGE_85_PLUS,SHAPE_Length,SHAPE_Area
0,32,Hampden,17547.0,2505.0,11750.0,1963.0,64.0,726.0,20.0,38.0,...,1363.0,791.0,700.0,304.0,253.0,285.0,200.0,229.0,0.200466,0.000768
1,3,Baker,4879.0,1664.0,2854.0,144.0,43.0,52.0,10.0,7.0,...,486.0,235.0,231.0,113.0,64.0,82.0,23.0,7.0,0.101767,0.000406
2,70,Washington Park,6905.0,295.0,6356.0,28.0,9.0,119.0,3.0,13.0,...,329.0,132.0,70.0,31.0,36.0,27.0,6.0,21.0,0.090445,0.000411
3,13,Cherry Creek,5589.0,332.0,4896.0,79.0,17.0,168.0,4.0,3.0,...,521.0,261.0,211.0,79.0,92.0,161.0,224.0,207.0,0.063923,0.000228
4,22,Country Club,3001.0,94.0,2740.0,20.0,8.0,77.0,1.0,5.0,...,90.0,55.0,34.0,14.0,17.0,9.0,4.0,2.0,0.052865,0.000177


In [18]:
cols_to_keep = ['NBRHD_NAME','POPULATION_2010']
demos = demos[cols_to_keep]
colnames ={
    'NBRHD_NAME':'neighborhood',
    'POPULATION_2010':'pop_2010'
}
demos.rename(columns=colnames,inplace=True)
population=demos
population.head()

Unnamed: 0,neighborhood,pop_2010
0,Hampden,17547.0
1,Baker,4879.0
2,Washington Park,6905.0
3,Cherry Creek,5589.0
4,Country Club,3001.0


In [19]:
list(population.neighborhood.unique()) == list(equity.neighborhood.unique())

False

In [20]:
pop_list = list(population.neighborhood.unique())
equity_list = list(equity.neighborhood.unique())


# check the difference between lists
print(list(set(pop_list) - set(equity_list)))
print(list(set(equity_list) - set(pop_list)))

['College View / South Platte', 'Gateway / Green Valley Ranch']
['College View - South Platte', 'Gateway - Green Valley Ranch']


In [21]:
equity.neighborhood = equity.neighborhood.str.replace('College View - South Platte', 'College View / South Platte', regex=True)
equity.neighborhood = equity.neighborhood.str.replace('Gateway - Green Valley Ranch','Gateway / Green Valley Ranch', regex=True)
equity_list = list(equity.neighborhood.unique())
list(set(pop_list) - set(equity_list))

[]

In [22]:
pop_list.sort()
equity_list.sort()
pop_list == equity_list

True

In [23]:
sidecar = equity.merge(population, on ='neighborhood')

In [24]:
sidecar

Unnamed: 0,neighborhood,equity_score,pop_2010
0,University Hills,3.5,5327.0
1,Harvey Park,2.2,11525.0
2,Mar Lee,2.3,12452.0
3,East Colfax,1.9,10191.0
4,Capitol Hill,3.1,14708.0
...,...,...,...
73,Whittier,2.9,4831.0
74,Cole,2.5,4651.0
75,Westwood,2.0,15486.0
76,Sunnyside,2.4,9726.0


## 7 Creating final DF by merging

#### neighborhood columns needs to be cleaned so all 78 values are identical

In [25]:
crime_df.neighborhood = crime_df.neighborhood.str.title()
df_list = list(crime_df.neighborhood.unique())
print(df_list)

['Valverde', 'Cbd', 'North-Capitol-Hill', 'Central-Park', 'Cory-Merrill', 'Capitol-Hill', 'Belcaro', 'Lincoln-Park', 'Villa-Park', 'Cheesman-Park', 'Virginia-Village', 'Highland', 'Windsor', 'Marston', 'Five-Points', 'West-Colfax', 'Sun-Valley', 'Hampden-South', 'Hampden', 'Dia', 'University', 'Cherry-Creek', 'Gateway-Green-Valley-Ranch', 'East-Colfax', 'West-Highland', 'Mar-Lee', 'Elyria-Swansea', 'Union-Station', 'University-Park', 'Jefferson-Park', 'Washington-Park', 'Lowry-Field', 'University-Hills', 'Goldsmith', 'Baker', 'City-Park-West', 'Civic-Center', 'Ruby-Hill', 'Northeast-Park-Hill', 'Barnum-West', 'Montbello', 'Wellshire', 'Fort-Logan', 'Bear-Valley', 'Overland', 'Kennedy', 'Harvey-Park-South', 'Sunnyside', 'Athmar-Park', 'Hilltop', 'Harvey-Park', 'Clayton', 'Westwood', 'Globeville', 'Washington-Virginia-Vale', 'North-Park-Hill', 'South-Park-Hill', 'Speer', 'Platt-Park', 'College-View-South-Platte', 'Congress-Park', 'Auraria', 'Barnum', 'Southmoor-Park', 'Sloan-Lake', 'Mont

In [26]:
# There's WAY too many hyphens

crime_df.neighborhood = crime_df.neighborhood.str.replace('-', ' ', regex=True)
df_list = list(crime_df.neighborhood.unique())
sidecar_list=list(sidecar.neighborhood.unique())
print(list(set(df_list) - set(sidecar_list)))

['College View South Platte', 'Dia', 'Central Park', 'Cory Merrill', 'Cbd', 'Gateway Green Valley Ranch']


In [27]:
# check set differences
print(list(set(sidecar_list) - set(df_list)))

['CBD', 'Cory - Merrill', 'College View / South Platte', 'Stapleton', 'Gateway / Green Valley Ranch', 'DIA']


In [28]:
# replace mismatched strings
crime_df.neighborhood = crime_df.neighborhood.str.replace('Dia', 'DIA', regex=True)\
.str.replace('Cbd', 'CBD', regex=True)\
.str.replace('Gateway Green Valley Ranch','Gateway / Green Valley Ranch', regex=True)\
.str.replace('Stapleton', 'Central Park', regex=True)\
.str.replace('Cory Merrill', 'Cory - Merrill', regex=True)\
.str.replace('College View South Platte', 'College View / South Platte', regex=True)

sidecar.neighborhood = sidecar.neighborhood.str.replace('Stapleton', 'Central Park', regex=True)

In [30]:
# check set differences
df_list = list(crime_df.neighborhood.unique())
sidecar_list=list(sidecar.neighborhood.unique())
print(list(set(sidecar_list) - set(df_list)))
print(list(set(df_list) - set(sidecar_list)))

[]
[]


In [31]:
crime_df.head()

Unnamed: 0,crime_type,crime_category,date,reported,y_lon,x_lat,neighborhood,dates,times
0,criminal-mischief-other,public-disorder,2022-01-04 11:30:00,2022-01-04 20:36:00,-105.018825,39.714268,Valverde,2022-01-04,11:30:00
1,criminal-mischief-other,public-disorder,2022-01-03 06:45:00,2022-01-03 11:01:00,-104.993418,39.743649,CBD,2022-01-03,06:45:00
2,criminal-mischief-other,public-disorder,2022-01-03 01:00:00,2022-01-03 06:11:00,-104.975557,39.74013,North Capitol Hill,2022-01-03,01:00:00
3,criminal-mischief-other,public-disorder,2022-01-03 19:47:00,2022-01-03 21:12:00,-105.014892,39.711518,Valverde,2022-01-03,19:47:00
4,criminal-mischief-other,public-disorder,2022-01-03 17:06:00,2022-01-03 20:31:00,-104.89795,39.769688,Central Park,2022-01-03,17:06:00


In [32]:
crime_df.shape

(466246, 9)

In [33]:
df = crime_df.merge(sidecar, on ='neighborhood')
df.head()

Unnamed: 0,crime_type,crime_category,date,reported,y_lon,x_lat,neighborhood,dates,times,equity_score,pop_2010
0,criminal-mischief-other,public-disorder,2022-01-04 11:30:00,2022-01-04 20:36:00,-105.018825,39.714268,Valverde,2022-01-04,11:30:00,1.6,3941.0
1,criminal-mischief-other,public-disorder,2022-01-03 19:47:00,2022-01-03 21:12:00,-105.014892,39.711518,Valverde,2022-01-03,19:47:00,1.6,3941.0
2,drug-opium-or-deriv-sell,drug-alcohol,2022-01-05 12:10:00,2022-01-05 15:01:00,-105.024456,39.718448,Valverde,2022-01-05,12:10:00,1.6,3941.0
3,drug-opium-or-deriv-possess,drug-alcohol,2022-01-05 12:10:00,2022-01-05 15:01:00,-105.024456,39.718448,Valverde,2022-01-05,12:10:00,1.6,3941.0
4,theft-of-motor-vehicle,auto-theft,2022-01-05 04:00:00,2022-01-05 11:08:00,-105.012233,39.711452,Valverde,2022-01-05,04:00:00,1.6,3941.0


In [34]:
df.shape

(466246, 11)

In [36]:
df.isna().sum()

crime_type        0
crime_category    0
date              0
reported          0
y_lon             0
x_lat             0
neighborhood      0
dates             0
times             0
equity_score      0
pop_2010          0
dtype: int64

In [35]:
df.to_csv('denver_crime_clean.csv')