In [1]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

In [2]:
# Get the data
elPasoData = pd.read_csv('data/cleanedElPasoData.csv', index_col = 0)
crimeData = pd.read_csv('data/edited El Paso Crimes By Day for 2020 and 2021.csv', thousands=",")

# Rename column to accurately reflect what it measures
crimeData.rename(columns={"Number of Incidents": "numberOfOffenses"}, inplace = True)

print("Sample of the Crime Data:")
print(crimeData.head())

print("Sample of the Covid Data:")
print(elPasoData[["date", "deaths", "Deaths_A_Day", "Face_Masks_Required_in_Public"]].tail())

Sample of the Crime Data:
           date  numberOfOffenses
0  Jan 1,  2020               264
1  Jan 2,  2020               106
2  Jan 3,  2020               113
3  Jan 4,  2020               117
4  Jan 5,  2020                96
Sample of the Covid Data:
           date  deaths  Deaths_A_Day Face_Masks_Required_in_Public
567  2021-08-11     938             1                            No
568  2021-08-12     939             1                            No
569  2021-08-13     943             4                            No
570  2021-08-14     943             0                            No
571  2021-08-15     945             2                            No


In [3]:
# Convert the date to date time
adjustedCrimeData = crimeData
adjustedCrimeData["date"] = pd.to_datetime(crimeData["date"])

# Restrict crime data to the dates we have covid data for.
startDate = pd.to_datetime(elPasoData.loc[0, 'date'])
endDate = pd.to_datetime(elPasoData.loc[len(elPasoData)-1, 'date'])
adjustedCrimeData = adjustedCrimeData[(adjustedCrimeData.date >= startDate)]
adjustedCrimeData = adjustedCrimeData[(adjustedCrimeData.date <= endDate)]
adjustedCrimeData.reset_index(drop = True, inplace = True)

# Make sure that there is no unexpected data.
print("Any NA values in the date column? " + str(adjustedCrimeData['date'].isnull().values.any()))
print("Any NA values in the Number of Offenses column? " + str(adjustedCrimeData['numberOfOffenses'].isnull().values.any()))

# There is one extra column that summarizes the entire month, remove it.
adjustedCrimeData.drop(adjustedCrimeData.tail(1).index,inplace=True)
print(adjustedCrimeData.tail())

Any NA values in the date column? False
Any NA values in the Number of Offenses column? False
          date  numberOfOffenses
567 2021-08-11                85
568 2021-08-12                78
569 2021-08-13                88
570 2021-08-14                89
571 2021-08-15                97


In [4]:
# Convert to datetime.
elPasoData["date"] = pd.to_datetime(elPasoData["date"])

# Create new columns
adjustedCrimeData["covidDeaths"] = 0
adjustedCrimeData["totalCovidDeaths"] = 0
adjustedCrimeData["maskMandate"] = None

# Since both data frames are sorted on date, pull information in elPaso data into adjusted crimeData on date.
for i in range(len(adjustedCrimeData)):
    crimeDate = adjustedCrimeData["date"][i]
    pasoDate = elPasoData["date"][i]
    
    if crimeDate == pasoDate:
        adjustedCrimeData["maskMandate"][i] = elPasoData["Face_Masks_Required_in_Public"][i]
        adjustedCrimeData["covidDeaths"][i] = elPasoData["Deaths_A_Day"][i]
        adjustedCrimeData["totalCovidDeaths"][i] = elPasoData["deaths"][i]

# Check that the data was succesfully moved over
print("Any NA values in the maskMandate column? " + str(adjustedCrimeData['maskMandate'].isnull().values.any()))
print(adjustedCrimeData.head())

# Save file to intermediary file to be used for model and figure generation
adjustedCrimeData.to_csv('data/ElPasoCovidAndCrimeData.csv')

Any NA values in the maskMandate column? False
        date  numberOfOffenses  covidDeaths  totalCovidDeaths maskMandate
0 2020-01-22               106            0                 0          No
1 2020-01-23                80            0                 0          No
2 2020-01-24               105            0                 0          No
3 2020-01-25                89            0                 0          No
4 2020-01-26                90            0                 0          No


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adjustedCrimeData["maskMandate"][i] = elPasoData["Face_Masks_Required_in_Public"][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adjustedCrimeData["covidDeaths"][i] = elPasoData["Deaths_A_Day"][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adjustedCrimeData["totalCovidDeaths"][i] = elPasoData["deaths"][i]
