In [92]:
# DS packages
import pandas as pd
import numpy as np

# Geopandas
import geopandas

# File Import
from os import listdir
from os.path import isfile, join

In [64]:
# Get list of all census data and import
all_files = [f for f in listdir(r'census_data') if isfile(join(r'census_data', f))]
data_files = [f for f in all_files if 'Data' in f]
print(data_files)

['ACSDP5Y2010.DP05-Data.csv', 'ACSDP5Y2011.DP05-Data.csv', 'ACSDP5Y2012.DP05-Data.csv', 'ACSDP5Y2013.DP05-Data.csv', 'ACSDP5Y2014.DP05-Data.csv', 'ACSDP5Y2015.DP05-Data.csv', 'ACSDP5Y2016.DP05-Data.csv', 'ACSDP5Y2017.DP05-Data.csv', 'ACSDP5Y2018.DP05-Data.csv', 'ACSDP5Y2019.DP05-Data.csv', 'ACSDP5Y2020.DP05-Data.csv', 'ACSDP5Y2021.DP05-Data.csv']


In [65]:
# Import data
base_dict = {'Geography': [], 'Geographic Area Name': [], 'Estimate!!SEX AND AGE!!Total population': []}
base_df = pd.DataFrame(base_dict)

for data_file in data_files:
    df_name = join(r'census_data', data_file)
    col_list = ['Geography', 'Geographic Area Name','Estimate!!SEX AND AGE!!Total population']
    file_year = data_file[7:11]

    df = pd.read_csv(df_name, skiprows=1, usecols=col_list)
    df['Year'] = file_year
    
    base_df = pd.concat([base_df, df], ignore_index=True)

base_df = base_df.rename(columns={'Estimate!!SEX AND AGE!!Total population':'Total_Population'})
base_df

Unnamed: 0,Geography,Geographic Area Name,Total_Population,Year
0,1600000US0600135,"Acalanes Ridge CDP, California",1612.0,2010
1,1600000US0600156,"Acampo CDP, California",25.0,2010
2,1600000US0600212,"Acton CDP, California",7150.0,2010
3,1600000US0600296,"Adelanto city, California",29764.0,2010
4,1600000US0600310,"Adin CDP, California",374.0,2010
...,...,...,...,...
18421,1600000US0686944,"Yreka city, California",7807.0,2021
18422,1600000US0686972,"Yuba City city, California",69540.0,2021
18423,1600000US0687042,"Yucaipa city, California",54312.0,2021
18424,1600000US0687056,"Yucca Valley town, California",21663.0,2021


In [77]:
base_df['City'] = base_df['Geographic Area Name'].map(lambda x: x.rstrip(', California').rstrip('CDP').rstrip('city').strip())
base_df

Unnamed: 0,Geography,Geographic Area Name,Total_Population,Year,City
0,1600000US0600135,"Acalanes Ridge CDP, California",1612.0,2010,Acalanes Ridge
1,1600000US0600156,"Acampo CDP, California",25.0,2010,Acampo
2,1600000US0600212,"Acton CDP, California",7150.0,2010,Acton
3,1600000US0600296,"Adelanto city, California",29764.0,2010,Adelanto
4,1600000US0600310,"Adin CDP, California",374.0,2010,Adin
...,...,...,...,...,...
18421,1600000US0686944,"Yreka city, California",7807.0,2021,Yreka
18422,1600000US0686972,"Yuba City city, California",69540.0,2021,Yuba City
18423,1600000US0687042,"Yucaipa city, California",54312.0,2021,Yucaipa
18424,1600000US0687056,"Yucca Valley town, California",21663.0,2021,Yucca Valley tow


In [78]:
base_df[base_df["Geographic Area Name"].str.contains("Adelanto")]

Unnamed: 0,Geography,Geographic Area Name,Total_Population,Year,City
3,1600000US0600296,"Adelanto city, California",29764.0,2010,Adelanto
1526,1600000US0600296,"Adelanto city, California",30670.0,2011,Adelanto
3049,1600000US0600296,"Adelanto city, California",31040.0,2012,Adelanto
4566,1600000US0600296,"Adelanto city, California",31376.0,2013,Adelanto
6083,1600000US0600296,"Adelanto city, California",31773.0,2014,Adelanto
7599,1600000US0600296,"Adelanto city, California",32059.0,2015,Adelanto
9121,1600000US0600296,"Adelanto city, California",32311.0,2016,Adelanto
10643,1600000US0600296,"Adelanto city, California",32867.0,2017,Adelanto
12165,1600000US0600296,"Adelanto city, California",33416.0,2018,Adelanto
13686,1600000US0600296,"Adelanto city, California",33660.0,2019,Adelanto


In [79]:
df_shp = pd.read_csv(r'shapefile\City_Boundaries.csv')
df_shp.head(20)

Unnamed: 0,OBJECTID,COUNTY,CITY,Shape__Area,Shape__Length,GlobalID
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8
1,2,Los Angeles,Agoura Hills,29643180.0,29626.940723,b9192abb-2d64-4f7b-b9dc-97b8a703521b
2,3,Alameda,Alameda,43583050.0,52137.876912,b8a7f28a-a71e-4e96-bd7e-78cf84f27f7d
3,4,Alameda,Albany,7558117.0,16774.017561,f25071bd-a3d2-437a-bdf4-2d25a0aea1f4
4,5,Los Angeles,Alhambra,28883380.0,25461.487633,a74ff6de-81a9-4d98-b4dc-a1f9d9730f4b
5,6,Orange,Aliso Viejo,25901250.0,33775.369631,32e63b56-98f9-462e-9e0d-86ed32f59a6f
6,7,Modoc,Alturas,13359030.0,23764.812734,f87fe79e-88fe-44fe-bacf-b16d00e72320
7,8,Amador,Amador City,1307710.0,4628.807175,0b0e5dd4-dd76-4d23-b8db-5f80e581a6d9
8,9,Napa,American Canyon,25657660.0,34793.644044,0e0b9daf-6b3d-4783-a074-8607c357f0eb
9,10,Orange,Anaheim,196109000.0,147822.690355,7b29a09d-80e7-4ecf-a52d-6fa122b3651c


In [106]:
dict = {'A':1, 'B':2}
list(dict.values())

[1, 2]

In [107]:
df_dict = {}

for year in list(range(2010, 2022)):
    sm_shp = geopandas.read_file(r'shapefile\City_Boundaries.shp')
    sm_shp['Year'] = year
    sm_shp['CITY'] = sm_shp['CITY'].str.strip()
    
    df_dict[year] = sm_shp

df_shp = geopandas.pd.concat(list(df_dict.values()))

df_shp['Year'] = df_shp['Year'].astype('int')
df_shp

Unnamed: 0,OBJECTID,COUNTY,CITY,Shape__Are,Shape__Len,GlobalID,geometry,Year
0,1,San Bernardino,Adelanto,2.025272e+08,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2010
1,2,Los Angeles,Agoura Hills,2.964318e+07,29626.940723,b9192abb-2d64-4f7b-b9dc-97b8a703521b,"POLYGON ((-13215828.688 4047115.804, -13215828...",2010
2,3,Alameda,Alameda,4.358305e+07,52137.876912,b8a7f28a-a71e-4e96-bd7e-78cf84f27f7d,"MULTIPOLYGON (((-13617731.274 4550805.581, -13...",2010
3,4,Alameda,Albany,7.558117e+06,16774.017561,f25071bd-a3d2-437a-bdf4-2d25a0aea1f4,"POLYGON ((-13613046.493 4565026.598, -13613003...",2010
4,5,Los Angeles,Alhambra,2.888338e+07,25461.487633,a74ff6de-81a9-4d98-b4dc-a1f9d9730f4b,"POLYGON ((-13149179.830 4042959.059, -13148716...",2010
...,...,...,...,...,...,...,...,...
477,478,Napa,Yountville,6.139924e+06,18166.776479,735a240a-584d-4627-8090-a2e92033d000,"POLYGON ((-13620669.817 4636906.587, -13620650...",2021
478,479,Siskiyou,Yreka,4.679887e+07,64683.190766,a1a1efeb-3906-417a-812e-2184e2101bd5,"POLYGON ((-13646382.898 5126329.073, -13646380...",2021
479,480,Sutter,Yuba City,6.630216e+07,74665.952003,060e429b-d220-42b0-82c9-bb07aa3ceb8b,"POLYGON ((-13537051.239 4743120.280, -13537055...",2021
480,481,San Bernardino,Yucaipa,1.068584e+08,66660.772363,048cc48d-99e6-4541-97c5-27b43a221a05,"POLYGON ((-13023938.262 4029406.072, -13024024...",2021


In [108]:
df_shp[df_shp.CITY.str.contains('Adelanto')]

Unnamed: 0,OBJECTID,COUNTY,CITY,Shape__Are,Shape__Len,GlobalID,geometry,Year
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2010
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2011
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2012
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2013
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2014
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2015
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2016
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2017
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2018
0,1,San Bernardino,Adelanto,202527200.0,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2019


In [109]:
base_df['Year'] = base_df['Year'].astype('int')

df_shp2 = df_shp.merge(base_df, left_on=['CITY', 'Year'], right_on=['City', 'Year'], how='left')
df_shp2

Unnamed: 0,OBJECTID,COUNTY,CITY,Shape__Are,Shape__Len,GlobalID,geometry,Year,Geography,Geographic Area Name,Total_Population,City
0,1,San Bernardino,Adelanto,2.025272e+08,122818.299432,a112aaa2-6c60-4c6f-83c5-5a03126064e8,"MULTIPOLYGON (((-13067991.331 4112789.226, -13...",2010,1600000US0600296,"Adelanto city, California",29764.0,Adelanto
1,2,Los Angeles,Agoura Hills,2.964318e+07,29626.940723,b9192abb-2d64-4f7b-b9dc-97b8a703521b,"POLYGON ((-13215828.688 4047115.804, -13215828...",2010,1600000US0600394,"Agoura Hills city, California",20328.0,Agoura Hills
2,3,Alameda,Alameda,4.358305e+07,52137.876912,b8a7f28a-a71e-4e96-bd7e-78cf84f27f7d,"MULTIPOLYGON (((-13617731.274 4550805.581, -13...",2010,1600000US0600562,"Alameda city, California",72512.0,Alameda
3,4,Alameda,Albany,7.558117e+06,16774.017561,f25071bd-a3d2-437a-bdf4-2d25a0aea1f4,"POLYGON ((-13613046.493 4565026.598, -13613003...",2010,1600000US0600674,"Albany city, California",17921.0,Albany
4,5,Los Angeles,Alhambra,2.888338e+07,25461.487633,a74ff6de-81a9-4d98-b4dc-a1f9d9730f4b,"POLYGON ((-13149179.830 4042959.059, -13148716...",2010,1600000US0600884,"Alhambra city, California",83389.0,Alhambra
...,...,...,...,...,...,...,...,...,...,...,...,...
5851,478,Napa,Yountville,6.139924e+06,18166.776479,735a240a-584d-4627-8090-a2e92033d000,"POLYGON ((-13620669.817 4636906.587, -13620650...",2021,1600000US0686930,"Yountville city, California",3351.0,Yountville
5852,479,Siskiyou,Yreka,4.679887e+07,64683.190766,a1a1efeb-3906-417a-812e-2184e2101bd5,"POLYGON ((-13646382.898 5126329.073, -13646380...",2021,1600000US0686944,"Yreka city, California",7807.0,Yreka
5853,480,Sutter,Yuba City,6.630216e+07,74665.952003,060e429b-d220-42b0-82c9-bb07aa3ceb8b,"POLYGON ((-13537051.239 4743120.280, -13537055...",2021,1600000US0686972,"Yuba City city, California",69540.0,Yuba City
5854,481,San Bernardino,Yucaipa,1.068584e+08,66660.772363,048cc48d-99e6-4541-97c5-27b43a221a05,"POLYGON ((-13023938.262 4029406.072, -13024024...",2021,1600000US0687042,"Yucaipa city, California",54312.0,Yucaipa


In [110]:
df_shp2.to_file('main_data.shp')

  df_shp2.to_file('main_data.shp')
