# Importing needed libraries

In [1]:
# to read and visualize spatial data
import geopandas as gpd

# to provide basemaps
import contextily as ctx

# to provide plots
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

# Importing Data

In [3]:
# Load a data file
gdf = gpd.read_file('Data sets/acs2020_5yr_B03002_14000US39061008202.geojson')

# Get the number of rows and columns
gdf.shape

#Changing the number of rows displayed
pd.set_option('display.max_rows', None)

# Finding what row contains Tract 9,10,16,17 (OTR neighborhood)
gdf.loc[gdf['name'] == 'Census Tract 9, Hamilton, OH']
gdf.loc[gdf['name'] == 'Census Tract 10, Hamilton, OH']
gdf.loc[gdf['name'] == 'Census Tract 16, Hamilton, OH']
gdf.loc[gdf['name'] == 'Census Tract 17, Hamilton, OH']

#Getting info about rows
    #[0:1] - United States
    #[[1:2] - Ohio State
    #[[2:3] - Hamilton County
    #[[122:123] - Cincinnati
    #[Over-The-Rhine (OTR) Neighborhood
    #[[5:6] - Tract 9
    #[[6:7] - Tract 10
    #[[8:9] - Tract 16
    #[[9:10] - Tract 17

Unnamed: 0,geoid,name,B03002001,"B03002001, Error",B03002002,"B03002002, Error",B03002003,"B03002003, Error",B03002004,"B03002004, Error",...,"B03002017, Error",B03002018,"B03002018, Error",B03002019,"B03002019, Error",B03002020,"B03002020, Error",B03002021,"B03002021, Error",geometry
9,14000US39061001700,"Census Tract 17, Hamilton, OH",1136.0,276.0,1122.0,277.0,411.0,184.0,601.0,178.0,...,12.0,0.0,12.0,0.0,12.0,0.0,12.0,0.0,12.0,"MULTIPOLYGON (((-84.51752 39.11946, -84.51742 ..."


# Cleaning Data

In [None]:
# Columns, null values, data types
gdf.info()


#list columns we have
list(gdf)


# Create a list of columns to keep - After checking metadata file
columns_to_keep = ['geoid',
                   'name',
                   'B03002001',
                   'B03002002',
                   'B03002003',
                   'B03002004',
                   'B03002005',
                   'B03002006',
                   'B03002007',
                   'B03002008',
                   'B03002012',
                   'geometry']


# Assign only the columns I want to keep to my dataframe
gdf = gdf[columns_to_keep]


#Rename the columns based on the metadata.json
gdf.columns = ['geoid',
               'name',
               'Total',
               'Not Hispanic or Latino',
               'White alone',
               'Black or African American alone',
               'American Indian and Alaska Native alone',
               'Asian alone',
               'Native Hawaiian and Other Pacific Islander alone',
               'Some other race alone',
               'Hispanic or Latino',
               'geometry']

In [None]:
# Want to drop almost all rows except the ones for neighborhood Over-The-Rhine and Cincinnati, 
# Hamilton County, Ohio State, United States = Tracts 9,10,16,17 = rows with index 5,6,8,9

gdf.index.values


gdf = gdf.drop([3, 4,    7,   10,  11,  12,  13,  14,  15,
        16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
        29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
        42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,
        55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,
        68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
        81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,
        94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
       107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
       120, 121, 123])


# Creating spatial visualizations

In [None]:
#Creating an impose plot of Ohio State over United States
fig, ax = plt.subplots(figsize = (15,10))
ax.set_aspect('equal')
gdf.iloc[0:1].geometry.plot(ax=ax, marker='o', color='red', alpha = 0.5, markersize=5)
gdf.iloc[1:2].plot(ax = ax, alpha = 0.4, color = 'blue', edgecolor = 'black')
ax.set(title='Ohio State in United States')

In [None]:
#Creating an impose plot of Hamilton County over Ohio State
fig, ax = plt.subplots(figsize = (8,6))
ax.set_aspect('equal')
ax.set(title='Hamilton County in Ohio State')
gdf.iloc[1:2].geometry.plot(ax=ax, marker='o', color='red', alpha = 0.5, markersize=5)
gdf.iloc[2:3].plot(ax = ax, alpha = 0.4, color = 'blue', edgecolor = 'black')

In [None]:
#Creating an impose plot of Cincinnati over Hamilton County
fig, ax = plt.subplots(figsize = (8,6))
ax.set_aspect('equal')
gdf.iloc[2:3].geometry.plot(ax=ax, marker='o', color='green', alpha = 0.5, markersize=5)
gdf.iloc[7:8].plot(ax = ax, alpha = 0.4, color = 'black', edgecolor = 'black')
ax.set(title='Cincinnati in Hamilton County')

In [None]:
#Creating an impose plot of OTR over Cincinnati
fig, ax = plt.subplots(figsize = (8,6))
ax.set_aspect('equal')

gdf.iloc[7:8].plot(ax = ax, alpha = 0.4, color = 'grey', edgecolor = 'black')
gdf.iloc[3:7].geometry.plot(ax=ax, marker='o', color='red', alpha = 0.5, markersize=5)
ax.set(title='OTR in Cincinnati')

# Spatial visualizations of OTR

In [None]:
# A plot showing different color by population size

fig, ax = plt.subplots(1,3, figsize= (15,4))

ax1, ax2, ax3 = ax
ax1 = gdf.iloc[3:7].plot(column='Total', cmap='Oranges',ax = ax1, legend= True)
ax1.set_title('Census Tracts Population', fontsize = 14) 

ax2 = gdf.iloc[3:7].plot(column=('White alone'), ax = ax2, cmap='Oranges',legend= True)
ax2.set_title('White Population in OTR', fontsize = 14) 

ax3 = gdf.iloc[3:7].plot(ax = ax3, column=('Black or African American alone'), cmap='Oranges',legend= True)
ax3.set_title('Black Population in OTR', fontsize = 14) 


# Investigate about creating one uniform legend

In [None]:
#Create two columns with percentages of White and Black population
gdf['Percent white'] = gdf['White alone']/gdf['Total']*100
gdf['Percent Black'] = gdf['Black or African American alone']/gdf['Total']*100

#Display dataframe
#display(gdf.loc[10:11])


#Quantile intervals visualization
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,15))

gdf.iloc[3:7].plot(column = 'Percent white', ax = ax1, cmap = "Blues", legend = True, scheme ='quantiles',k=4,
                   edgecolor='white', linewidth=0., alpha= 0.75,)
ax1.set_title('Quartile Intervals White Population', fontsize = 14) 


gdf.iloc[3:7].plot(column = 'Percent Black', ax = ax2,cmap = "Blues", scheme='quantiles', k=4, 
                   edgecolor='white', linewidth=0., alpha= 0.75,legend=True)
ax2.set_title('Quartile Intervals Black Population', fontsize = 14) 


In [None]:
# Creating plots adding basemaps

gdf_web_mercator = gdf.to_crs(epsg=3857)

fig, ax = plt.subplots(1,3, figsize=(30,8))
ax1, ax2, ax3 = ax

gdf_web_mercator.iloc[3:7][gdf_web_mercator['Percent white'] > 50].plot(figsize = (25,10), ax=ax1, alpha=0.8)
ax1.axis('off')
ax1.set_title('Census tracts with more than 50% white', fontsize = 22)

gdf_web_mercator.iloc[3:7][gdf_web_mercator['Percent Black'] > 50].plot(figsize = (25,10),ax=ax2, alpha=0.9)
ax2.axis('off')
ax2.set_title('Census tracts with more than 50% black', fontsize = 22)

ax3 = gdf_web_mercator.iloc[3:7].plot( column = 'Percent white',alpha=0.8, ax=ax3,cmap='Blues', legend=True,)
ax3.axis('off')
ax3.set_title('White percentage of census tracts', fontsize = 22)


ctx.add_basemap(ax1, zoom=16)
ctx.add_basemap(ax2, zoom=16)
ctx.add_basemap(ax3, zoom=16)

#Investigate about creating plot 2 + 1

# Graphs

In [None]:
#Create a bar graph

x= list(gdf.iloc[3:7]['name'])
display(x)
y = list(gdf.iloc[3:7]['Total'])
display(y)

plt.title(label = "Total Population of OTR")
plt.barh(x, y)
plt.figure(figsize=(10,5))
plt.show()

In [None]:
#Same bar graph sorted

gdf_sorted = gdf.sort_values(by= 'Total', ascending = True)
x= list(gdf_sorted.iloc[0:4]['name'])
display(x)
y = list(gdf_sorted.iloc[0:4]['Total'])
display(y)

plt.title(label = "Total Population of OTR")
plt.barh(x,y)
plt.figure(figsize=(15,12))
plt.show()

In [None]:
#Importing pickle files

pdf1950 = pd.read_pickle('pdf1950.pkl')
pdf1960 = pd.read_pickle('pdf1960.pkl')
pdf1970 = pd.read_pickle('pdf1970.pkl')
pdf1980 = pd.read_pickle('pdf1980.pkl')
pdf1990 = pd.read_pickle('pdf1990.pkl')
pdf2000 = pd.read_pickle('pdf2000.pkl')
pdf2010 = pd.read_pickle('pdf2010.pkl')
pdf2020 = pd.read_pickle('pdf2020.pkl')

In [None]:
pdf1950

In [None]:
#creating a dataframe for white population

# Create an empty list
White_1950_list =[]
White_1960_list =[]
White_1970_list =[]
White_1980_list =[]
White_1990_list =[]
White_2000_list =[]
White_2010_list =[]
White_2020_list =[]
  
    
# iterate over the rows
for i, row in pdf1950.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_1950_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]
    
for i, row in pdf1960.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_1960_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]
    
for i, row in pdf1970.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_1970_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]

for i, row in pdf1980.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_1980_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]

for i, row in pdf1990.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_1990_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]
    
for i, row in pdf2000.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_2000_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]
    
for i, row in pdf2010.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_2010_rows = [row['Census Tract 9, Hamilton County, Ohio'], row['Census Tract 10, Hamilton County, Ohio'],
                       row['Census Tract 16, Hamilton County, Ohio'], row['Census Tract 17, Hamilton County, Ohio'],
                       row['TOTAL (All Selected Census Tracts)']]
    
for i, row in pdf2020.iloc[1:2].iterrows():
    # create a list representing the dataframe row
    White_2020_rows = [row['Census Tract 9 Total'], row['Census Tract 10 Total'],
                       row['Census Tract 16 Total'], row['Census Tract 17 Total'],
                       row['TOTAL (All Selected Census Tracts)']]
    

# append row list to ls
White_1950_list.append(White_1950_rows)
White_1960_list.append(White_1960_rows)
White_1970_list.append(White_1970_rows)
White_1980_list.append(White_1980_rows)
White_1990_list.append(White_1990_rows)
White_2000_list.append(White_2000_rows)
White_2010_list.append(White_2010_rows)
White_2020_list.append(White_2020_rows)


#creating df
#Category = ['Census Tract 9', 'Census Tract 10', 'Census Tract 16', 'Census Tract 17', 'Total OTR']
data = [['Census Tract 9', 7474.0], ['Census Tract 10', 5752.0], ['Census Tract 16', 5849.0], ['Census Tract 17', 6207.0], ['Total OTR', 25282.0]]
#data1 = [[*Category[0],White_1960_list[0]], [*Category[1], White_1960_list[1]]]


Wdf = pd.DataFrame(data, columns=['Category', '1950'])
Wdf
        
    
  

In [None]:
my_list = [10, 11, 12, 13, 14]
i = [1, 4]
element = []
for index in i:
    element.append(my_list[index])
print(element)

In [None]:
myList = White_1980_list
myList = [item[0].split(",") for item in myList]
print(myList)

In [None]:
White_1980_list = [ item for elem in White_1980_list for item in elem]

In [None]:
len(White_1980_list)

# Stats

In [None]:
#Obtaining some stats

#What is the total population of OTR
gdf.iloc[3:7]['Total'].sum()
print('The total population of OTR is: ' + str(gdf.iloc[3:7]['Total'].sum()))

#What is the mean
gdf.iloc[3:7]['Total'].mean()

#What is the median
gdf.iloc[3:7]['Total'].median()

#Get some more stats
gdf.iloc[3:7]['Total'].describe()