# Import necessary packages and Functions from custom function file

In [2]:
import pandas as pd
import numpy as np
import psycopg2 
from sql_functions import get_engine
from sql_functions import get_dataframe 
import sqlalchemy
import matplotlib.pyplot as plt
import seaborn as sns

schema= 'capstone_crime_nerds'

Importing our engine information for connecting to the SQL database and assigning it the 'engine' variable.

In [3]:
engine = get_engine()

# Pulling Data From SQL Database

## Pull table median rent data from sql database

In [3]:
median_rent = get_dataframe(f'select * FROM {schema}.median_rent_monthly')

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


The raw rent data includes AreaName which includes currently boroughs and smaller areas within these boroughs. We only want the boroughs from this list but we will move on with cleanup later. We also have the median rent value for the area and a column with the month and year. This data starts at Jan, 2012 and goes until December 2024.

In [4]:
median_rent

Unnamed: 0,areaName,Borough,areaType,month,median_rent
0,All Downtown,Manhattan,submarket,2010-01,3200.0
1,All Midtown,Manhattan,submarket,2010-01,2875.0
2,All Upper East Side,Manhattan,submarket,2010-01,2450.0
3,All Upper Manhattan,Manhattan,submarket,2010-01,1825.0
4,All Upper West Side,Manhattan,submarket,2010-01,2895.0
...,...,...,...,...,...
33853,Windsor Terrace,Brooklyn,neighborhood,2024-03,3150.0
33854,Woodhaven,Queens,neighborhood,2024-03,2500.0
33855,Woodlawn,Bronx,neighborhood,2024-03,
33856,Woodside,Queens,neighborhood,2024-03,2700.0


----

## Pull income data from sql database

In [5]:
median_income = get_dataframe(f'select * FROM {schema}.median_income_yearly')

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


Our Median income data has the 5 boroughs of new york and includes median annual salary for these boroughs. This salary data is calculated yearly and our data set includes data from the year 2010 to 2022.

In [6]:
median_income

Unnamed: 0,borough,median_income_usd,year
0,"Bronx borough, Bronx County, New York",34264,2010
1,"Brooklyn borough, Kings County, New York",43567,2010
2,"Manhattan borough, New York County, New York",64971,2010
3,"Queens borough, Queens County, New York",55291,2010
4,"Staten Island borough, Richmond County, New York",71084,2010
...,...,...,...
60,"Bronx borough, Bronx County, New York",47036,2022
61,"Brooklyn borough, Kings County, New York",74692,2022
62,"Manhattan borough, New York County, New York",99880,2022
63,"Queens borough, Queens County, New York",82431,2022


----

## Pull Crime data from sql database (p/crime type)

From our full crime data we will use a SQL query to only take specific columns. We will take the report date and rename it to just date, the crime type and the borough columns.

In [7]:
crime_data = get_dataframe(f'select report_date as date, crime_type, borough  from {schema}.criminal_data_new')

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


In [8]:
crime_data.head()

Unnamed: 0,date,crime_type,borough
0,2013-08-30,Felony,Brooklyn
1,2013-08-30,Misdemeanor,Bronx
2,2013-08-30,Misdemeanor,Queens
3,2013-08-30,Felony,Manhattan
4,2013-08-31,Felony,Bronx


------

# Read Data From CSV

## Reading in Total Population Data

For our population data we have data inside many csv files. Each year has it's own population data including many different views. To read our csv files in we can create a for loop which will read all of them. First, we will create a list of the different years included in our data. As all files have identical names apart from the year we can read through each file using a list.

First, we create a list which has the different years.

In [9]:
years_list = list(range(2010,2023))

years_list

[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]

Next, we create a new dictionary called pop_data. This dictionary will include a data frame for each year of population data. The for loop will loop through each year in our list and replace that year in the 'read_csv' file name and read in the csv for that specific file. Following this, we will extract only two columns as the full csv file contains a lot of unnecessary data. Following this, we will create a new column called year which will have the current year included (at that point of time in the for loop) as otherwise there is no way to identify them. We will also rename the columns which include the population data and the borough names into something that matches our dataset. ('borough' and 'population')

After this, the data frame is added to the dictionary, which can beall joined together later on.

In [10]:
pop_data = {}

for x in years_list:
    pop = pd.read_csv(f'../GOODDATA/age_race_data/ACSST5Y{x}.S0501-Data.csv')
    pop = pop[['NAME','S0501_C01_001E']]
    pop['year'] = x
    pop.rename(columns={'NAME':'county','S0501_C01_001E':'population'}, inplace=True)
    pop.drop(pop.index[0], inplace=True)

    pop_data[f'population_{x}'] = pop

We can have a look at one of the specific data frames within the dictionary by calling it. Here we can view the structure of our data frames.

In [11]:
pop_data['population_2021']

Unnamed: 0,county,population,year
1,"Bronx County, New York",1468262,2021
2,"Kings County, New York",2712360,2021
3,"New York County, New York",1669127,2021
4,"Queens County, New York",2393104,2021
5,"Richmond County, New York",493194,2021


Having all of our population data inside separate data frames inside a dictionary is not ideal for us, however. We can join each of the items inside the dictionary (our dataframes) together using the .concat function included with pandas. This will concatonate (join) all of our data frames within the dictionary into one data frame.

We assign this data to the variable 'population_data'.

In [12]:
population_data = pd.concat(pop_data.values(), ignore_index=True)

population_data

Unnamed: 0,county,population,year
0,"Bronx County, New York",1365725,2010
1,"Kings County, New York",2466782,2010
2,"New York County, New York",1583345,2010
3,"Queens County, New York",2199169,2010
4,"Richmond County, New York",463450,2010
...,...,...,...
60,"Bronx County, New York",1443229,2022
61,"Kings County, New York",2679620,2022
62,"New York County, New York",1645867,2022
63,"Queens County, New York",2360826,2022


## Ethnicity Population Data

Next, we will read in a new set of data related to the different total numbers of different ethnicity's that live in the different New York City boroughs.

First, we create a list called 'years'. This will allow us to loop through the different CSV files as our data is currently seperated into different CSV files according to the year of the data.

In [13]:
# As we have data from 2016 to 2022, we can use 'range' to give us a quick list of all the years between 2016 and 2023.
years = list(range(2016,2023))
years

[2016, 2017, 2018, 2019, 2020, 2021, 2022]

As the following loop is rather large, you can read the comments in between the steps to get an understanding of what exactly is happening. 

In [14]:

race_data = {} # First, we create an empty dictionary. This will serve as storage for all of the data frames we will be creating.

for x in years: 
    # We loop through each year in the list and replace x with the year.
    race = pd.read_csv(f'../GOODDATA/race data 2016-2022/ACSDT5Y{x}.B02001-Data.csv')
    # We read in the CSV files and assign them the name 'race' for now.
    race = race[['NAME','B02001_001E','B02001_002E',
                 'B02001_003E','B02001_004E','B02001_005E','B02001_006E',
                 'B02001_007E','B02001_008E']]
    # Now, we select only the columns from the csv file which are necessary for our analysis.
    race['year'] = x
    # This line adds a new column to the dataset which includes the year on each row. This is how we will differentiate the different yearly data when they are all put together later.
    race.rename(columns={'NAME':'county','B02001_001E':'total','B02001_002E':'white',
                         'B02001_003E':'black','B02001_004E':'native','B02001_005E':'asian',
                         'B02001_006E':'islander','B02001_007E':'hispanic','B02001_008E':'two'}, inplace=True)
    # The columns are renamed into something that we understand.
    race.drop(race.index[0], inplace=True)
    # We can drop the first trow in our dataset as it contains information not important to us.
    cols = ['total','white','black','native','asian','islander','hispanic','two']
    for col in cols:
        race[col] = pd.to_numeric(race[col], errors='coerce')
    # The for loop above will convert the columns in the 'colc' list to numeric so they can be used in mathematical functions later on.
    race['other'] = race.islander + race.two
    race.drop(['two', 'islander'],axis=1,inplace=True)
    race['borough'] = race['county'].str.split(' borough', expand=True)[0]
    race = race.reindex(columns=['county','borough','year','total','white','black','native','asian','hispanic','other'])



    race_data[f'race_{x}'] = race

In [15]:
race_data['race_2020']

Unnamed: 0,county,borough,year,total,white,black,native,asian,hispanic,other
1,"Bronx borough, Bronx County, New York",Bronx,2020,1427056,306569,497301,9539,55362,459756,98529
2,"Brooklyn borough, Kings County, New York",Brooklyn,2020,2576771,1103205,806746,8688,306741,228625,122766
3,"Manhattan borough, New York County, New York",Manhattan,2020,1629153,898723,233476,6867,198678,190559,100850
4,"Queens borough, Queens County, New York",Queens,2020,2270976,814353,409975,10641,588875,311820,135312
5,"Staten Island borough, Richmond County, New York",Staten Island,2020,475596,340436,48623,1149,47605,18419,19364


In [16]:
race_data_full = pd.concat(race_data.values(), ignore_index=True)


In [17]:
race_data_full.tail()

Unnamed: 0,county,borough,year,total,white,black,native,asian,hispanic,other
30,"Bronx borough, Bronx County, New York",Bronx,2022,1443229,258727,491689,14119,56630,462156,159908
31,"Brooklyn borough, Kings County, New York",Brooklyn,2022,2679620,1082004,803621,14503,321110,261596,196786
32,"Manhattan borough, New York County, New York",Manhattan,2022,1645867,842952,230583,8518,199086,209622,155106
33,"Queens borough, Queens County, New York",Queens,2022,2360826,720630,417637,13901,614652,368856,225150
34,"Staten Island borough, Richmond County, New York",Staten Island,2022,492925,326334,48670,1787,56516,23636,35982


In [18]:
id_vars = ['county', 'borough', 'year']

value_vars = ['total', 'white', 'black', 'native', 'asian', 'hispanic', 'other']

race_data_melt = race_data_full.melt(id_vars=id_vars, value_vars=value_vars, var_name='ethnicity', value_name='count')


In [19]:
race_data_melt.head(5)

Unnamed: 0,county,borough,year,ethnicity,count
0,"Bronx borough, Bronx County, New York",Bronx,2016,total,1436785
1,"Brooklyn borough, Kings County, New York",Brooklyn,2016,total,2606852
2,"Manhattan borough, New York County, New York",Manhattan,2016,total,1634989
3,"Queens borough, Queens County, New York",Queens,2016,total,2310011
4,"Staten Island borough, Richmond County, New York",Staten Island,2016,total,473324


In [20]:
id_vars = ['county', 'borough', 'year','total']

value_vars = ['white', 'black', 'native', 'asian', 'hispanic', 'other']

race_data_melt2 = race_data_full.melt(id_vars=id_vars, value_vars=value_vars, var_name='ethnicity', value_name='count')

In [21]:
race_data_melt2

Unnamed: 0,county,borough,year,total,ethnicity,count
0,"Bronx borough, Bronx County, New York",Bronx,2016,1436785,white,295351
1,"Brooklyn borough, Kings County, New York",Brooklyn,2016,2606852,white,1128927
2,"Manhattan borough, New York County, New York",Manhattan,2016,1634989,white,928883
3,"Queens borough, Queens County, New York",Queens,2016,2310011,white,936414
4,"Staten Island borough, Richmond County, New York",Staten Island,2016,473324,white,355407
...,...,...,...,...,...,...
205,"Bronx borough, Bronx County, New York",Bronx,2022,1443229,other,159908
206,"Brooklyn borough, Kings County, New York",Brooklyn,2022,2679620,other,196786
207,"Manhattan borough, New York County, New York",Manhattan,2022,1645867,other,155106
208,"Queens borough, Queens County, New York",Queens,2022,2360826,other,225150


In [22]:
race_data_melt2['share'] = race_data_melt2['count'] / race_data_melt2['total']

Uploading this ethnicity data to the SQL database:

In [23]:
table_name = 'population_by_ethnicity'
engine = get_engine()


if engine!=None:
    try:
        race_data_melt2.to_sql(name=table_name, 
                        con=engine, 
                        if_exists='replace',
                       schema=schema,
                       index=False, 
                        chunksize=5000, 
                       method='multi') 
        print(f"The {table_name} table was imported successfully.")
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


The population_by_ethnicity table was imported successfully.


Checking that the table is correctly uploaded and can be called using a function.

In [24]:
get_dataframe(f'select * from {schema}.population_by_ethnicity')

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


Unnamed: 0,county,borough,year,total,ethnicity,count,share
0,"Bronx borough, Bronx County, New York",Bronx,2016,1436785,white,295351,0.205564
1,"Brooklyn borough, Kings County, New York",Brooklyn,2016,2606852,white,1128927,0.433061
2,"Manhattan borough, New York County, New York",Manhattan,2016,1634989,white,928883,0.568128
3,"Queens borough, Queens County, New York",Queens,2016,2310011,white,936414,0.405372
4,"Staten Island borough, Richmond County, New York",Staten Island,2016,473324,white,355407,0.750875
...,...,...,...,...,...,...,...
205,"Bronx borough, Bronx County, New York",Bronx,2022,1443229,other,159908,0.110799
206,"Brooklyn borough, Kings County, New York",Brooklyn,2022,2679620,other,196786,0.073438
207,"Manhattan borough, New York County, New York",Manhattan,2022,1645867,other,155106,0.094240
208,"Queens borough, Queens County, New York",Queens,2022,2360826,other,225150,0.095369


-----

# Initial Data Cleanup


## Cleaning the Population Data

There a ra few clean-up tasks we have for the population data. Firstly, we have our borough names which do not match the borough naming convention of the other data sets. We can use a dictionary and the .map method to create a new column with the correct naming conventions. As the existing names are based on the county names we manually createa dictionary translating this into our borough names.


In [25]:
borough_dict = {'Bronx County, New York': 'Bronx', 
                'Kings County, New York': 'Brooklyn', 
                'New York County, New York': 'Manhattan', 
                'Queens County, New York': 'Queens', 
                'Richmond County, New York': 'Staten Island'}

In [26]:
population_data['borough'] = population_data.county.map(borough_dict)

In [27]:
population_data

Unnamed: 0,county,population,year,borough
0,"Bronx County, New York",1365725,2010,Bronx
1,"Kings County, New York",2466782,2010,Brooklyn
2,"New York County, New York",1583345,2010,Manhattan
3,"Queens County, New York",2199169,2010,Queens
4,"Richmond County, New York",463450,2010,Staten Island
...,...,...,...,...
60,"Bronx County, New York",1443229,2022,Bronx
61,"Kings County, New York",2679620,2022,Brooklyn
62,"New York County, New York",1645867,2022,Manhattan
63,"Queens County, New York",2360826,2022,Queens


We will also keep the county names in for this data specifically, as we will be using Tableau later on for our visualizations. Tableau can geographically map different locations and the county names will help us create nice location-based visualizations later down the line, as the borough names can be ambiguous on the grand scale of the US.

For now, we will change the order of the columns only.


In [28]:
population_data = population_data.reindex(columns=['county','borough','year','population'])

In [29]:
print(type(population_data))

<class 'pandas.core.frame.DataFrame'>


We will also change the 'population' column to the integer data type as it is currently a string (object) value.

In [30]:
population_data.population = population_data.population.astype(int)

Population data was also found for the year 2023. This can be manually added in in the code below. To do this, we create a second data frame with the necessary information and then we can use the .append to add this to the existing population data.

In [31]:
pop_2023 = pd.DataFrame({'county': ['Bronx County, New York', 'Kings County, New York',
       'New York County, New York', 'Queens County, New York',
       'Richmond County, New York'],
       'borough': ['Bronx','Brooklyn', 'Manhattan', 'Queens', 'Staten Island'],
    'year': [2023, 2023, 2023, 2023, 2023],
    'population': [1404200, 2560600, 1569400, 2230200, 495600]})

In [32]:
population_data = pd.concat([population_data,pop_2023])

In [33]:
population_data

Unnamed: 0,county,borough,year,population
0,"Bronx County, New York",Bronx,2010,1365725
1,"Kings County, New York",Brooklyn,2010,2466782
2,"New York County, New York",Manhattan,2010,1583345
3,"Queens County, New York",Queens,2010,2199169
4,"Richmond County, New York",Staten Island,2010,463450
...,...,...,...,...
0,"Bronx County, New York",Bronx,2023,1404200
1,"Kings County, New York",Brooklyn,2023,2560600
2,"New York County, New York",Manhattan,2023,1569400
3,"Queens County, New York",Queens,2023,2230200


## Cleaning the Rent Data

Filtering the AreaTypes column to only include the borough data.

In [34]:
median_rent_borough = median_rent[median_rent['areaType'] == 'borough']


Here we can see that we have extracted data for only the 5 boroughs of New York, matching our crime data.

In [35]:
median_rent_borough

Unnamed: 0,areaName,Borough,areaType,month,median_rent
22,Bronx,Bronx,borough,2010-01,1600.0
24,Brooklyn,Brooklyn,borough,2010-01,2200.0
108,Manhattan,Manhattan,borough,2010-01,2800.0
150,Queens,Queens,borough,2010-01,1655.0
172,Staten Island,Staten Island,borough,2010-01,
...,...,...,...,...,...
33682,Bronx,Bronx,borough,2024-03,2600.0
33684,Brooklyn,Brooklyn,borough,2024-03,3375.0
33768,Manhattan,Manhattan,borough,2024-03,4296.0
33810,Queens,Queens,borough,2024-03,2995.0


A quick .info() shows us that we have 855 total rows and 5 different columns. The median rent is a float value and the others are currently object/string values.

In [36]:
median_rent_borough.info()

<class 'pandas.core.frame.DataFrame'>
Index: 855 entries, 22 to 33832
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   areaName     855 non-null    object 
 1   Borough      855 non-null    object 
 2   areaType     855 non-null    object 
 3   month        855 non-null    object 
 4   median_rent  826 non-null    float64
dtypes: float64(1), object(4)
memory usage: 40.1+ KB


Since the two columns, areaName and areaType are redundant we can drop them with the .drop function. The inplace=true ensures that the data frame is overridden. 

In [37]:
median_rent_borough.drop(['areaName','areaType'],axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  median_rent_borough.drop(['areaName','areaType'],axis=1, inplace=True)


Since we removed many rows and columns we will need to reset the index so it matches our new date frame structure.

In [38]:
median_rent_borough.reset_index(drop=True, inplace=True)

When we have a quick look at our new data frame we can see it includes all the information we need and the borough names look good.

In [39]:
median_rent_borough

Unnamed: 0,Borough,month,median_rent
0,Bronx,2010-01,1600.0
1,Brooklyn,2010-01,2200.0
2,Manhattan,2010-01,2800.0
3,Queens,2010-01,1655.0
4,Staten Island,2010-01,
...,...,...,...
850,Bronx,2024-03,2600.0
851,Brooklyn,2024-03,3375.0
852,Manhattan,2024-03,4296.0
853,Queens,2024-03,2995.0


We convert the month column to a different data type. This is the datetime format in the pandas library. This allows us to call date-specific functions on the column.

In [40]:
median_rent_borough['month'] = pd.to_datetime(median_rent_borough['month'], format='%Y-%m')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  median_rent_borough['month'] = pd.to_datetime(median_rent_borough['month'], format='%Y-%m')


We can confirm the new column data type with a .info()

In [41]:
median_rent_borough.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 855 entries, 0 to 854
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Borough      855 non-null    object        
 1   month        855 non-null    datetime64[ns]
 2   median_rent  826 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 20.2+ KB


In [42]:
median_rent_borough['year'] = median_rent_borough['month'].dt.year
median_rent_borough['month'] = median_rent_borough['month'].dt.month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  median_rent_borough['year'] = median_rent_borough['month'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  median_rent_borough['month'] = median_rent_borough['month'].dt.month


We will also change the columnn name of "Borough" to match the crime data. This will change to "borough".

In [43]:
median_rent_borough.rename({'Borough':'borough'},axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  median_rent_borough.rename({'Borough':'borough'},axis=1,inplace=True)


In [44]:
median_rent_borough

Unnamed: 0,borough,month,median_rent,year
0,Bronx,1,1600.0,2010
1,Brooklyn,1,2200.0,2010
2,Manhattan,1,2800.0,2010
3,Queens,1,1655.0,2010
4,Staten Island,1,,2010
...,...,...,...,...
850,Bronx,3,2600.0,2024
851,Brooklyn,3,3375.0,2024
852,Manhattan,3,4296.0,2024
853,Queens,3,2995.0,2024


At this point we are happy with the changes made to the median rent data and can continue to ready the other data sets to merge them together.

---


## Crime Data Cleanup

An initial look at the data frame shows is there are three columns. One for the date that the crime was reported, one for the borough in which the crime was reported

In [45]:
print(f'The earliest date in the data set is {crime_data.date.min()}.\n'
      f'The latest date in the data set is {crime_data.date.max()}.\n'
      f'\nThere are {crime_data.shape[0]} rows and {crime_data.shape[1]} columns.'
      f'The columns in this dataset are: {", ".join(crime_data.columns)}')




The earliest date in the data set is 2006-01-01.
The latest date in the data set is 2023-12-31.

There are 8912005 rows and 3 columns.The columns in this dataset are: date, crime_type, borough


In [46]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8912005 entries, 0 to 8912004
Data columns (total 3 columns):
 #   Column      Dtype 
---  ------      ----- 
 0   date        object
 1   crime_type  object
 2   borough     object
dtypes: object(3)
memory usage: 204.0+ MB


We change the date column from an object type to the datetime type.

In [47]:
crime_data['date'] = pd.to_datetime(crime_data['date'], format='%Y-%m-%d')

We can confirm that this was done correctly with the .info method.

In [48]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8912005 entries, 0 to 8912004
Data columns (total 3 columns):
 #   Column      Dtype         
---  ------      -----         
 0   date        datetime64[ns]
 1   crime_type  object        
 2   borough     object        
dtypes: datetime64[ns](1), object(2)
memory usage: 204.0+ MB


In [49]:
crime_data.head()

Unnamed: 0,date,crime_type,borough
0,2013-08-30,Felony,Brooklyn
1,2013-08-30,Misdemeanor,Bronx
2,2013-08-30,Misdemeanor,Queens
3,2013-08-30,Felony,Manhattan
4,2013-08-31,Felony,Bronx


The 'value_counts()' method shows us how many datapoints (crimes) are included in each borough.

In [50]:
crime_data.borough.value_counts()

borough
Brooklyn         2619205
Manhattan        2150573
Bronx            1928866
Queens           1803561
Staten Island     409800
Name: count, dtype: int64

Next we want to create new columns which are for the month and the year. We can extract this from the date column now that it is in the datetime format.

In [51]:
crime_data['year'] = crime_data['date'].dt.year
crime_data['month'] = crime_data['date'].dt.month

In [52]:
crime_data

Unnamed: 0,date,crime_type,borough,year,month
0,2013-08-30,Felony,Brooklyn,2013,8
1,2013-08-30,Misdemeanor,Bronx,2013,8
2,2013-08-30,Misdemeanor,Queens,2013,8
3,2013-08-30,Felony,Manhattan,2013,8
4,2013-08-31,Felony,Bronx,2013,8
...,...,...,...,...,...
8912000,2013-08-28,Misdemeanor,Bronx,2013,8
8912001,2013-08-29,Misdemeanor,Bronx,2013,8
8912002,2013-08-29,Felony,Bronx,2013,8
8912003,2013-08-29,Felony,Brooklyn,2013,8


In [53]:
crime_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8912005 entries, 0 to 8912004
Data columns (total 5 columns):
 #   Column      Dtype         
---  ------      -----         
 0   date        datetime64[ns]
 1   crime_type  object        
 2   borough     object        
 3   year        int32         
 4   month       int32         
dtypes: datetime64[ns](1), int32(2), object(2)
memory usage: 272.0+ MB


The last step in our crime data preparation is to aggregate crimes per month, year and crime type in each borough.

In [54]:
crime_agg = crime_data.groupby(['borough', 'year', 'month','crime_type']).size().reset_index(name='total_crimes')


In [55]:
crime_agg

Unnamed: 0,borough,year,month,crime_type,total_crimes
0,Bronx,2006,1,Felony,2532
1,Bronx,2006,1,Misdemeanor,5461
2,Bronx,2006,1,Violation,1328
3,Bronx,2006,2,Felony,2007
4,Bronx,2006,2,Misdemeanor,4629
...,...,...,...,...,...
3235,Staten Island,2023,11,Misdemeanor,1178
3236,Staten Island,2023,11,Violation,387
3237,Staten Island,2023,12,Felony,521
3238,Staten Island,2023,12,Misdemeanor,1131


-----

## Income Data Cleanup

In [56]:
median_income

Unnamed: 0,borough,median_income_usd,year
0,"Bronx borough, Bronx County, New York",34264,2010
1,"Brooklyn borough, Kings County, New York",43567,2010
2,"Manhattan borough, New York County, New York",64971,2010
3,"Queens borough, Queens County, New York",55291,2010
4,"Staten Island borough, Richmond County, New York",71084,2010
...,...,...,...
60,"Bronx borough, Bronx County, New York",47036,2022
61,"Brooklyn borough, Kings County, New York",74692,2022
62,"Manhattan borough, New York County, New York",99880,2022
63,"Queens borough, Queens County, New York",82431,2022


All we need to do at this stage to prepare the income data for the merge is to change the borough names. As seen below, the borough column contains extended names for the boroughs which includes the county and city. For our merge we just need the borough names themselves. For this, we can use the split string method. 

In the code below, we split the values at the word ' borough' (including the space before the word). After the split, the "[0]" allows us to take the first part of the split, which would be the name we need.

In [57]:
median_income['borough'] = median_income['borough'].str.split(' borough', expand=True)[0]


Next, we would like to rename the column 'median_income_usd' to 'median_yearly_income' as we will add a monthly breakdown as well.

In [58]:
median_income.rename({'median_income_usd':'median_yearly_income'}, axis=1,inplace=True)

In [59]:
median_income

Unnamed: 0,borough,median_yearly_income,year
0,Bronx,34264,2010
1,Brooklyn,43567,2010
2,Manhattan,64971,2010
3,Queens,55291,2010
4,Staten Island,71084,2010
...,...,...,...
60,Bronx,47036,2022
61,Brooklyn,74692,2022
62,Manhattan,99880,2022
63,Queens,82431,2022


We will now create the extra column which includes a monthly income breakdown. This is achieved by dividing the yearly income by 12 and separating it into the 12 month columns for each year. First, as the median yearly income column is still a string type, we will need to change it to an integer which can be divided. 

In [60]:
median_income.median_yearly_income = median_income.median_yearly_income.astype(int)


We have also discovered income data from the year 2023 for each of our counties. This can be added in using the .concat method. 

In [61]:
income_2023 = pd.DataFrame({'borough':['Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'], 
                            'median_yearly_income':[43011,69567,104558,84757,101528], 
                            'year':[2023,2023,2023,2023,2023]})

In [62]:
median_income = pd.concat([median_income,income_2023])

In [63]:

median_income['median_monthly_income'] = (median_income['median_yearly_income']/12)

In [64]:
median_income

Unnamed: 0,borough,median_yearly_income,year,median_monthly_income
0,Bronx,34264,2010,2855.333333
1,Brooklyn,43567,2010,3630.583333
2,Manhattan,64971,2010,5414.250000
3,Queens,55291,2010,4607.583333
4,Staten Island,71084,2010,5923.666667
...,...,...,...,...
0,Bronx,43011,2023,3584.250000
1,Brooklyn,69567,2023,5797.250000
2,Manhattan,104558,2023,8713.166667
3,Queens,84757,2023,7063.083333


-----

# Data Merging

## Merging Data Together

First, we can merge the aggregated Crime data with the median rent data. We temporarily call this "crime_rent_merge"

In [65]:
crime_rent_merge = pd.merge(crime_agg, median_rent_borough, on=['borough', 'year', 'month'])


In [66]:
crime_rent_merge

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent
0,Bronx,2010,1,Felony,2252,1600.0
1,Bronx,2010,1,Misdemeanor,6074,1600.0
2,Bronx,2010,1,Violation,1062,1600.0
3,Bronx,2010,2,Felony,1823,1600.0
4,Bronx,2010,2,Misdemeanor,5000,1600.0
...,...,...,...,...,...,...
2515,Staten Island,2023,11,Misdemeanor,1178,2249.0
2516,Staten Island,2023,11,Violation,387,2249.0
2517,Staten Island,2023,12,Felony,521,2253.0
2518,Staten Island,2023,12,Misdemeanor,1131,2253.0


After this, we can also merge the median income data onto the crime_rent_merge data we have. For now we will call this crime_economy_merge.

In [67]:
crime_economy_merge = pd.merge(crime_rent_merge, median_income, on=['borough', 'year'])


In [68]:
crime_economy_merge

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent,median_yearly_income,median_monthly_income
0,Bronx,2010,1,Felony,2252,1600.0,34264,2855.333333
1,Bronx,2010,1,Misdemeanor,6074,1600.0,34264,2855.333333
2,Bronx,2010,1,Violation,1062,1600.0,34264,2855.333333
3,Bronx,2010,2,Felony,1823,1600.0,34264,2855.333333
4,Bronx,2010,2,Misdemeanor,5000,1600.0,34264,2855.333333
...,...,...,...,...,...,...,...,...
2515,Staten Island,2023,11,Misdemeanor,1178,2249.0,101528,8460.666667
2516,Staten Island,2023,11,Violation,387,2249.0,101528,8460.666667
2517,Staten Island,2023,12,Felony,521,2253.0,101528,8460.666667
2518,Staten Island,2023,12,Misdemeanor,1131,2253.0,101528,8460.666667


## Merged Data Cleanup and Format

In [69]:
crime_economy_merge

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent,median_yearly_income,median_monthly_income
0,Bronx,2010,1,Felony,2252,1600.0,34264,2855.333333
1,Bronx,2010,1,Misdemeanor,6074,1600.0,34264,2855.333333
2,Bronx,2010,1,Violation,1062,1600.0,34264,2855.333333
3,Bronx,2010,2,Felony,1823,1600.0,34264,2855.333333
4,Bronx,2010,2,Misdemeanor,5000,1600.0,34264,2855.333333
...,...,...,...,...,...,...,...,...
2515,Staten Island,2023,11,Misdemeanor,1178,2249.0,101528,8460.666667
2516,Staten Island,2023,11,Violation,387,2249.0,101528,8460.666667
2517,Staten Island,2023,12,Felony,521,2253.0,101528,8460.666667
2518,Staten Island,2023,12,Misdemeanor,1131,2253.0,101528,8460.666667


Now that we have a column for the monthly median rent and a column for the monthly median income we can create a new column which is a calculation of the ratio of rent/income. This will give us an idea of how much of a person's wage is spent on rent per borough.

In [70]:
crime_economy_merge['rent_income_ratio'] = crime_economy_merge['median_rent'] / crime_economy_merge['median_monthly_income']


In [71]:
crime_economy_merge

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent,median_yearly_income,median_monthly_income,rent_income_ratio
0,Bronx,2010,1,Felony,2252,1600.0,34264,2855.333333,0.560355
1,Bronx,2010,1,Misdemeanor,6074,1600.0,34264,2855.333333,0.560355
2,Bronx,2010,1,Violation,1062,1600.0,34264,2855.333333,0.560355
3,Bronx,2010,2,Felony,1823,1600.0,34264,2855.333333,0.560355
4,Bronx,2010,2,Misdemeanor,5000,1600.0,34264,2855.333333,0.560355
...,...,...,...,...,...,...,...,...,...
2515,Staten Island,2023,11,Misdemeanor,1178,2249.0,101528,8460.666667,0.265818
2516,Staten Island,2023,11,Violation,387,2249.0,101528,8460.666667,0.265818
2517,Staten Island,2023,12,Felony,521,2253.0,101528,8460.666667,0.266291
2518,Staten Island,2023,12,Misdemeanor,1131,2253.0,101528,8460.666667,0.266291


In [72]:
crime_type_economy_df = pd.merge(crime_economy_merge,population_data, on=['borough', 'year'])

In [73]:
crime_type_economy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2520 entries, 0 to 2519
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   borough                2520 non-null   object 
 1   year                   2520 non-null   int32  
 2   month                  2520 non-null   int32  
 3   crime_type             2520 non-null   object 
 4   total_crimes           2520 non-null   int64  
 5   median_rent            2433 non-null   float64
 6   median_yearly_income   2520 non-null   int64  
 7   median_monthly_income  2520 non-null   float64
 8   rent_income_ratio      2433 non-null   float64
 9   county                 2520 non-null   object 
 10  population             2520 non-null   int64  
dtypes: float64(3), int32(2), int64(3), object(3)
memory usage: 197.0+ KB


Create new column for Crime Rate defined as total crimes per 100,000 people

In [74]:
crime_type_economy_df['crime_ratio'] = (crime_type_economy_df.total_crimes / crime_type_economy_df.population)*100000

In [75]:
crime_type_economy_df['date'] = pd.to_datetime(crime_type_economy_df[['year', 'month']].assign(day=1))

In [76]:
crime_type_economy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2520 entries, 0 to 2519
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   borough                2520 non-null   object        
 1   year                   2520 non-null   int32         
 2   month                  2520 non-null   int32         
 3   crime_type             2520 non-null   object        
 4   total_crimes           2520 non-null   int64         
 5   median_rent            2433 non-null   float64       
 6   median_yearly_income   2520 non-null   int64         
 7   median_monthly_income  2520 non-null   float64       
 8   rent_income_ratio      2433 non-null   float64       
 9   county                 2520 non-null   object        
 10  population             2520 non-null   int64         
 11  crime_ratio            2520 non-null   float64       
 12  date                   2520 non-null   datetime64[ns]
dtypes: 

In [77]:
crime_type_economy_df.head()

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent,median_yearly_income,median_monthly_income,rent_income_ratio,county,population,crime_ratio,date
0,Bronx,2010,1,Felony,2252,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,164.894104,2010-01-01
1,Bronx,2010,1,Misdemeanor,6074,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,444.745465,2010-01-01
2,Bronx,2010,1,Violation,1062,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,77.760896,2010-01-01
3,Bronx,2010,2,Felony,1823,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,133.482216,2010-02-01
4,Bronx,2010,2,Misdemeanor,5000,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,366.105914,2010-02-01


In [78]:
engine = get_engine()
table_name = 'economy_data'

if engine!=None:
    try:
        crime_type_economy_df.to_sql(name=table_name, 
                        con=engine, 
                        if_exists='replace',
                       schema=schema,
                       index=False, 
                        chunksize=5000, 
                       method='multi') 
        print(f"The {table_name} table was imported successfully.")
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

Python-dotenv could not parse statement starting at line 14
Python-dotenv could not parse statement starting at line 18


The economy_data table was imported successfully.


In [None]:
get_dataframe(f'select * from {schema}.economy_data')

-----

In [None]:
sns.pairplot(crime_economy_merge)

In [None]:
plt.figure(figsize=(10, 8))
sns.scatterplot(data=crime_economy_merge, x='total_crimes', y='rent_income_ratio')
plt.title('Total Crimes vs Rent Ratio')
plt.show()

# Random Analysis shit

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=crime_type_economy_df, x='year', y='total_crimes', hue='crime_type', palette='Set2')
plt.title('Total Crimes over Years differentiated by Borough')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()


In [1]:

correlation = crime_type_economy_df['total_crimes'].corr(big_df['median_rent'])
print(f"The correlation between total crimes and median rent is {correlation}")

NameError: name 'crime_type_economy_df' is not defined

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=crime_type_economy_df, x='total_crimes', y='median_rent', hue='crime_type', palette='Set2')
plt.title('Correlation between Total Crimes and Median Rent differentiated by Borough')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.show()


In [None]:
crimes_by_offense = get_dataframe(f'select borough, extract(year from report_date) as year, extract(month from report_date) as month, offense, count(*) from {schema}.criminal_data_new group by 1,2,3,4')

In [None]:
crimes_by_offense

-----

-----

# Sex Population Data

In [None]:
ageyears = list(range(2016,2023))


In [None]:
ageyears

In [None]:
sex_data = {}


for x in ageyears:
    if x == 2016:
        sex = pd.read_csv(f'../GOODDATA/age pop data/ACSST5Y{x}.S0101-Data.csv')
        sex = sex[['NAME','S0101_C01_001E','S0101_C02_001E','S0101_C03_001E']]
        sex['year'] = x
        sex.rename(columns={'NAME':'county','S0101_C01_001E':'total','S0101_C02_001E':'male','S0101_C03_001E':'female'}, inplace=True)
        sex.drop(sex.index[0], inplace=True)
        cols = ['total','male','female']
        for col in cols:
            sex[col] = pd.to_numeric(sex[col], errors='coerce')
        sex['borough'] = sex['county'].str.split(' borough', expand=True)[0]
        sex = sex.reindex(columns=['county','borough','year','total','female','male'])
        sex_data[f'sex_{x}'] = sex
    else:
        sex = pd.read_csv(f'../GOODDATA/age pop data/ACSST5Y{x}.S0101-Data.csv')
        sex = sex[['NAME','S0101_C01_001E','S0101_C03_001E','S0101_C05_001E']]
        sex['year'] = x
        sex.rename(columns={'NAME':'county','S0101_C01_001E':'total','S0101_C03_001E':'male','S0101_C05_001E':'female'}, inplace=True)
        sex.drop(sex.index[0], inplace=True)
        cols = ['total','male','female']
        for col in cols:
            sex[col] = pd.to_numeric(sex[col], errors='coerce')
        sex['borough'] = sex['county'].str.split(' borough', expand=True)[0]
        sex = sex.reindex(columns=['county','borough','year','total','female','male'])
        sex_data[f'sex_{x}'] = sex


In [None]:
sex_data['sex_2019']

In [None]:
gender_data = pd.concat(sex_data.values(), ignore_index=True)


In [None]:
gender_data

In [None]:
table_name = 'population_by_gender'
engine = get_engine()


if engine!=None:
    try:
        gender_data.to_sql(name=table_name, 
                        con=engine, 
                        if_exists='replace',
                       schema=schema,
                       index=False, 
                        chunksize=5000, 
                       method='multi') 
        print(f"The {table_name} table was imported successfully.")
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

In [None]:
get_dataframe(f'select * from {schema}.population_by_gender')

-----

# Age Category Population Distribution

In [None]:
age_data = {}


for x in ageyears:
    if x == 2016:
        age = pd.read_csv(f'../GOODDATA/age pop data/ACSST5Y{x}.S0101-Data.csv')
        age = age[['NAME','S0101_C01_001E','S0101_C01_002E','S0101_C01_020E','S0101_C01_021E','S0101_C01_022E','S0101_C01_007E','S0101_C01_008E','S0101_C01_009E','S0101_C01_010E','S0101_C01_011E','S0101_C01_012E','S0101_C01_013E','S0101_C01_014E','S0101_C01_028E']]
        age['year'] = x
        age.rename(columns={'NAME':'county','S0101_C01_001E':'total','S0101_C01_002E':'18_3','S0101_C01_020E':'18_1','S0101_C01_021E':'18_2','S0101_C01_022E':'18-24','S0101_C01_007E':'25-44_1','S0101_C01_008E':'25-44_2','S0101_C01_009E':'25-44_3','S0101_C01_010E':'25-44_4',
                            'S0101_C01_011E':'45-64_1','S0101_C01_012E':'45-64_2','S0101_C01_013E':'45-64_3','S0101_C01_014E':'45-64_4','S0101_C01_028E':'65+'}, inplace=True)
        age.drop(age.index[0], inplace=True)

        cols = ['total','18_1', '18_2', '18_3', '18-24', '25-44_1', '25-44_2','25-44_3', '25-44_4', '45-64_1', '45-64_2', '45-64_3', '45-64_4', '65+']
        for col in cols:
            age[col] = pd.to_numeric(age[col], errors='coerce')
        
        age['>18'] = round(age['total'] *  ((age['18_1'] + age['18_2'] + age['18_3']) / 100),0)
        age['18-24'] = round(age['total'] *  (age['18-24'] / 100),0)
        age['25-44'] = round(age['total'] *  ((age['25-44_1'] + age['25-44_2'] + age['25-44_3'] + age['25-44_4']) /100),0)
        age['45-64'] = round(age['total'] *  ((age['45-64_1'] + age['45-64_2'] + age['45-64_3'] + age['45-64_4']) / 100),0)
        age['65+'] = round(age['total'] * (age['65+'] /100),0)
        age['borough'] = age['county'].str.split(' borough', expand=True)[0]
        ages = age[['county','borough','year','total', '>18', '18-24', '25-44', '45-64', '65+']]
        #age = age.reindex(columns=['county','borough','year','total','female','male'])
        ages.astype
        age_data[f'age_{x}'] = ages

    else:
        age = pd.read_csv(f'../GOODDATA/age pop data/ACSST5Y{x}.S0101-Data.csv')
        age = age[['NAME','S0101_C01_001E','S0101_C01_022E','S0101_C01_023E','S0101_C01_007E','S0101_C01_008E','S0101_C01_009E','S0101_C01_010E','S0101_C01_011E','S0101_C01_012E','S0101_C01_013E','S0101_C01_014E','S0101_C01_030E']]
        age['year'] = x
        age.rename(columns={'NAME':'county','S0101_C01_001E':'total','S0101_C01_022E':'>18','S0101_C01_023E':'18-24','S0101_C01_007E':'25-1','S0101_C01_008E':'25-2','S0101_C01_009E':'25-3',
                            'S0101_C01_010E':'25-4','S0101_C01_011E':'45-1','S0101_C01_012E':'45-2','S0101_C01_013E':'45-3','S0101_C01_014E':'45-4','S0101_C01_030E':'65+'}, inplace=True)
        age.drop(age.index[0], inplace=True)
        cols = ['total', '>18', '18-24', '25-1', '25-2', '25-3', '25-4',
       '45-1', '45-2', '45-3', '45-4', '65+']
        for col in cols:
            age[col] = pd.to_numeric(age[col], errors='coerce')
        age['25-44'] = age['25-1'] + age['25-2'] + age['25-3'] + age['25-4']
        age['45-64'] = age['45-1'] + age['45-2'] + age['45-3'] + age['45-4']

        age['borough'] = age['county'].str.split(' borough', expand=True)[0]
        ages = age[['county','borough','year','total', '>18', '18-24', '25-44', '45-64', '65+']]

        age_data[f'age_{x}'] = ages




In [None]:
pop_age_data = pd.concat(age_data.values(), ignore_index=True)


In [None]:
pop_age_data.columns

Transposing data from Wide to Long format using the Melt function

In [None]:
id_vars = ['county', 'borough', 'year','total']

value_vars = ['>18', '18-24', '25-44', '45-64',
       '65+']

age_group_data = pop_age_data.melt(id_vars=id_vars, value_vars=value_vars, var_name='age_group', value_name='count')

In [None]:
age_group_data

Uploading Population levels by age group to the SQL database

In [None]:
table_name = 'population_by_age_group'
engine = get_engine()


if engine!=None:
    try:
        age_group_data.to_sql(name=table_name, 
                        con=engine, 
                        if_exists='replace',
                       schema=schema,
                       index=False, 
                        chunksize=5000, 
                       method='multi') 
        print(f"The {table_name} table was imported successfully.")
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

Checking that the data is correctly uploaded and can be called using a function.

In [None]:
get_dataframe(f'select * from {schema}.population_by_age_group')

In [4]:
economy = get_dataframe(f'select * from {schema}.economy_data')

In [5]:
economy

Unnamed: 0,borough,year,month,crime_type,total_crimes,median_rent,median_yearly_income,median_monthly_income,rent_income_ratio,county,population,crime_ratio,date
0,Bronx,2010,1,Felony,2252,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,164.894104,2010-01-01
1,Bronx,2010,1,Misdemeanor,6074,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,444.745465,2010-01-01
2,Bronx,2010,1,Violation,1062,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,77.760896,2010-01-01
3,Bronx,2010,2,Felony,1823,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,133.482216,2010-02-01
4,Bronx,2010,2,Misdemeanor,5000,1600.0,34264,2855.333333,0.560355,"Bronx County, New York",1365725,366.105914,2010-02-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2515,Staten Island,2023,11,Misdemeanor,1178,2249.0,101528,8460.666667,0.265818,"Richmond County, New York",495600,237.691687,2023-11-01
2516,Staten Island,2023,11,Violation,387,2249.0,101528,8460.666667,0.265818,"Richmond County, New York",495600,78.087167,2023-11-01
2517,Staten Island,2023,12,Felony,521,2253.0,101528,8460.666667,0.266291,"Richmond County, New York",495600,105.125101,2023-12-01
2518,Staten Island,2023,12,Misdemeanor,1131,2253.0,101528,8460.666667,0.266291,"Richmond County, New York",495600,228.208232,2023-12-01


In [8]:

correlation = economy['total_crimes'].corr(economy['rent_income_ratio'])
print(f"The correlation between total crimes and rent_income_ratio is {correlation}")

The correlation between total crimes and rent_income_ratio is 0.4887645374924724
