In [1]:
!pip install prophet
import pandas as pd
import plotly.graph_objs as go 
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns



In [2]:
import censusdata
import numpy as np

In [3]:
!pip install censusdata



In [4]:
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

**Atlanta Data**

In [5]:
counties = {'13':['013','015','035','045', '057', '063' , '067', '077', '085', '089', '097', '113', '117',
                 '121', '135', '143', '149', '151', '159', '171', '199', '211', '217', '223', '227', '231',
                 '247', '255', '297']}
states = {'13':'GA'}

In [6]:
def local_census_data(year,state,county):
    atlanta_county = censusdata.download('acs5', year,
                                 censusdata.censusgeo([('state', state), ('county', county), ('tract', '*')]),
                                 ['B01003_001E','B06009_005E','B06009_006E','B25001_001E','B25002_002E','B25002_003E',
                                  'B25003_002E', 'B25003_003E', 'B19013_001E'])
    atlanta_county = atlanta_county.reset_index()
    atlanta_county = atlanta_county.rename(columns={
        'index': 'Census Tract Code',
        'B01003_001E': 'Total Population',
        'B06009_005E': 'Bachelor degree',
        'B06009_006E': 'Graduate or Professional Degree',
        'B25001_001E': 'Total Housing',
        'B25002_002E': 'Total Occupied housing units',
        'B25002_003E': 'Total Vacant housing units',
        'B25003_002E': 'Total Owner occupied housing units',
        'B25003_003E': 'Total Renter occupied housing units',
        'B19013_001E': 'Median Household Income (USD)'
    })
    atlanta_county ['Bachelor degree'] = atlanta_county['Bachelor degree']+atlanta_county['Graduate or Professional Degree']
    atlanta_county['Percentage of Rent'] = atlanta_county['Total Renter occupied housing units']/atlanta_county['Total Housing']
    atlanta_county['Census Tract Code'] = atlanta_county['Census Tract Code'].astype(str)
    atlanta_county['Census Tract Code'] = atlanta_county['Census Tract Code'].apply(lambda x: str(x).split(">")[-1].split(":")[1].strip() 
                                                                if isinstance(x, str) else x.tract.split(":")[-1].strip())
    atlanta_county['Year'] = year
    atlanta_county['State'] = states[state]
    atlanta_county = atlanta_county.sort_values(by = ['Census Tract Code'], ignore_index = True)
    return atlanta_county

In [7]:
# Concatenate
atlanta_all = pd.DataFrame()
for year in range(2010, 2020):
    for state in counties.keys():
        for county in counties[state]:
            atlanta_county = local_census_data(year,state, county)
            atlanta_all = pd.concat([atlanta_all, atlanta_county], ignore_index=True)

In [8]:
atlanta_all.head()

Unnamed: 0,Census Tract Code,Total Population,Bachelor degree,Graduate or Professional Degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,Median Household Income (USD),Percentage of Rent,Year,State
0,180103,3350,502,128,1333,1101,232,989,112,67566.0,0.084021,2010,GA
1,180104,2138,157,71,829,749,80,624,125,45817.0,0.150784,2010,GA
2,180105,3242,219,33,1053,1017,36,933,84,54766.0,0.079772,2010,GA
3,180106,2787,204,31,1176,1055,121,973,82,43776.0,0.069728,2010,GA
4,180107,4025,160,43,1683,1472,211,891,581,39130.0,0.345217,2010,GA


In [9]:
atlanta_all = atlanta_all.dropna()
atlanta_all = atlanta_all[atlanta_all['Median Household Income (USD)'] != -666666666.0]

In [10]:
#atlanta_all = atlanta_all.drop(['Bachelor degree','Graduate or Professional Degree'], axis=1)

# Transform Census Tract to Zip code

In [11]:
zip_tract = pd.read_excel('TRACT_ZIP_122021.xlsx')
zip_tract['tract'] = zip_tract['tract'].apply(lambda x: str(x)[-6:])
zip_tract = zip_tract.rename(columns={'tract': "Census Tract Code", 'usps_zip_pref_state':'State'})
merged_df = pd.merge(atlanta_all, zip_tract, on=['Census Tract Code','State'], how='left')

In [12]:
grouped_data = merged_df.groupby(['zip','Year']).agg({
    'Total Population': 'sum',
    'Bachelor degree': 'sum',
    'Total Housing': 'sum',
    'Total Occupied housing units': 'sum',
    'Total Vacant housing units': 'sum',
    'Total Owner occupied housing units': 'sum',
    'Total Renter occupied housing units': 'sum',
    'Median Household Income (USD)': 'mean'
}).reset_index()
grouped_data['Median Household Income (USD)'] = grouped_data['Median Household Income (USD)'].round(2)
grouped_data['Percentage of Rent'] = round(grouped_data['Total Renter occupied housing units'] / grouped_data['Total Housing'],3)

In [13]:
grouped_data.head()

Unnamed: 0,zip,Year,Total Population,Bachelor degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,Median Household Income (USD),Percentage of Rent
0,30002,2010,17126,3181,7762,6811,951,2807,4004,39473.4,0.516
1,30002,2011,16670,3478,7740,6735,1005,2884,3851,39711.2,0.498
2,30002,2012,16684,3633,7910,6712,1198,2937,3775,40497.4,0.477
3,30002,2013,16935,3717,7949,6787,1162,2947,3840,39573.2,0.483
4,30002,2014,16582,3946,7951,6647,1304,2851,3796,38020.0,0.477


In [14]:
#grouped_data.to_csv('2010-2019_Atlanta_ACS_yearly.csv', index=False)

In [15]:
grouped_data.to_csv('Y_AT_ACS_2010-2019.csv', index=False)

# Performed Linear Interpolation

In [16]:
## Create DF
df = (pd
        .DataFrame( index= pd.date_range( '2010-01-01', '2019-12-31' ,freq='MS'))
      .reset_index()
      .rename(columns={'index' : 'y_dt'})
      .assign(key=1)
     )

## load data
df_to_merge = grouped_data
df_census_tract = df_to_merge.loc[:, ['zip']].drop_duplicates().assign(key=1)
df = pd.merge( df, df_census_tract, how='outer', on='key').drop(['key'], axis=1)

In [17]:
df

Unnamed: 0,y_dt,zip
0,2010-01-01,30002
1,2010-01-01,30003
2,2010-01-01,30004
3,2010-01-01,30005
4,2010-01-01,30006
...,...,...
89755,2019-12-01,39867
89756,2019-12-01,39870
89757,2019-12-01,39877
89758,2019-12-01,39897


In [18]:
df_to_merge

Unnamed: 0,zip,Year,Total Population,Bachelor degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,Median Household Income (USD),Percentage of Rent
0,30002,2010,17126,3181,7762,6811,951,2807,4004,39473.4,0.516
1,30002,2011,16670,3478,7740,6735,1005,2884,3851,39711.2,0.498
2,30002,2012,16684,3633,7910,6712,1198,2937,3775,40497.4,0.477
3,30002,2013,16935,3717,7949,6787,1162,2947,3840,39573.2,0.483
4,30002,2014,16582,3946,7951,6647,1304,2851,3796,38020.0,0.477
...,...,...,...,...,...,...,...,...,...,...,...
7475,39901,2015,4005,53,1178,1026,152,56,970,25034.0,0.823
7476,39901,2016,3451,63,1041,918,123,22,896,25000.0,0.861
7477,39901,2017,3555,62,928,843,85,0,843,26523.0,0.908
7478,39901,2018,3308,71,898,814,84,0,814,26630.0,0.906


In [19]:
assert df_to_merge.shape[0] == df_to_merge.loc[:, ['zip', 'Year']].drop_duplicates().shape[0]

## Deal with assert
df_to_merge.loc[:, 'y_dt'] = pd.to_datetime(df_to_merge['Year'], format='%Y')
df_to_merge = df_to_merge.sort_values('y_dt')

df_to_merge.head()
merged_df = pd.merge( df, df_to_merge, how='left', on = ['y_dt', 'zip'])

In [20]:
merged_df

Unnamed: 0,y_dt,zip,Year,Total Population,Bachelor degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,Median Household Income (USD),Percentage of Rent
0,2010-01-01,30002,2010.0,17126.0,3181.0,7762.0,6811.0,951.0,2807.0,4004.0,39473.40,0.516
1,2010-01-01,30003,2010.0,6170.0,503.0,2506.0,2048.0,458.0,569.0,1479.0,36187.00,0.590
2,2010-01-01,30004,2010.0,99984.0,35832.0,38664.0,35491.0,3173.0,28928.0,6563.0,105401.19,0.170
3,2010-01-01,30005,2010.0,66991.0,27351.0,24120.0,22715.0,1405.0,17408.0,5307.0,106472.92,0.220
4,2010-01-01,30006,2010.0,4251.0,1887.0,2697.0,2288.0,409.0,196.0,2092.0,53074.00,0.776
...,...,...,...,...,...,...,...,...,...,...,...,...
89755,2019-12-01,39867,,,,,,,,,,
89756,2019-12-01,39870,,,,,,,,,,
89757,2019-12-01,39877,,,,,,,,,,
89758,2019-12-01,39897,,,,,,,,,,


In [21]:
merged_df = merged_df.sort_values(['zip', 'y_dt'])

new_df = merged_df.copy()
new_df.loc[:, 'new_col'] = np.nan
new_df

Unnamed: 0,y_dt,zip,Year,Total Population,Bachelor degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,Median Household Income (USD),Percentage of Rent,new_col
0,2010-01-01,30002,2010.0,17126.0,3181.0,7762.0,6811.0,951.0,2807.0,4004.0,39473.4,0.516,
748,2010-02-01,30002,,,,,,,,,,,
1496,2010-03-01,30002,,,,,,,,,,,
2244,2010-04-01,30002,,,,,,,,,,,
2992,2010-05-01,30002,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
86767,2019-08-01,39901,,,,,,,,,,,
87515,2019-09-01,39901,,,,,,,,,,,
88263,2019-10-01,39901,,,,,,,,,,,
89011,2019-11-01,39901,,,,,,,,,,,


In [25]:
for census_tract in merged_df.loc[:, 'zip'].unique():

    merged_df = merged_df.sort_values(['zip', 'y_dt'])

    for col in ['Total Population', 'Bachelor degree', 'Total Housing', 'Total Occupied housing units', 
                'Total Vacant housing units', 'Total Owner occupied housing units',
                'Total Renter occupied housing units', 'Median Household Income (USD)']:

        temp_df = (merged_df.loc[(merged_df.loc[:, 'zip'] == census_tract), [col]]
                    .interpolate(method = "spline", order = 2,  limit_direction = "both",  downcast = "infer")
                    .interpolate(method='bfill')
                    .reset_index(drop=True))

        new_df.loc[(new_df.loc[:, 'zip'] == census_tract), f'Monthly {col}'] = temp_df.values


The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.
The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too 

In [26]:
temp_df

Unnamed: 0,Median Household Income (USD)
0,26584.000000
1,27145.175642
2,27660.475561
3,28130.215953
4,28554.396818
...,...
115,30140.798853
116,30469.745731
117,30813.111456
118,31170.896027


In [27]:
new_df

Unnamed: 0,y_dt,zip,Year,Total Population,Bachelor degree,Total Housing,Total Occupied housing units,Total Vacant housing units,Total Owner occupied housing units,Total Renter occupied housing units,...,Percentage of Rent,new_col,Monthly Total Population,Monthly Bachelor degree,Monthly Total Housing,Monthly Total Occupied housing units,Monthly Total Vacant housing units,Monthly Total Owner occupied housing units,Monthly Total Renter occupied housing units,Monthly Median Household Income (USD)
0,2010-01-01,30002,2010.0,17126.0,3181.0,7762.0,6811.0,951.0,2807.0,4004.0,...,0.516,,17126.000000,3181.000000,7762.000000,6811.000000,951.000000,2807.000000,4004.000000,39473.400000
748,2010-02-01,30002,,,,,,,,,...,,,17069.654850,3211.409659,7750.640294,6803.310751,947.362102,2814.298199,3988.305814,39456.888558
1496,2010-03-01,30002,,,,,,,,,...,,,17016.622045,3240.800468,7741.141174,6795.809981,945.407496,2821.427718,3972.908870,39447.126703
2244,2010-04-01,30002,,,,,,,,,...,,,16966.924859,3269.163093,7733.357285,6788.560613,944.909493,2828.397144,3958.075615,39443.950773
2992,2010-05-01,30002,,,,,,,,,...,,,16920.563292,3296.497534,7727.288626,6781.562645,945.868093,2835.206479,3943.806047,39447.360769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86767,2019-08-01,39901,,,,,,,,,...,,,2831.904433,46.414062,765.825513,736.290588,69.455016,-1.323964,756.106956,30140.798853
87515,2019-09-01,39901,,,,,,,,,...,,,2808.168377,41.177971,748.703304,728.542324,68.082193,-1.613387,752.434080,30469.745731
88263,2019-10-01,39901,,,,,,,,,...,,,2784.568194,35.547254,730.561494,720.424381,66.643768,-1.925796,748.697863,30813.111456
89011,2019-11-01,39901,,,,,,,,,...,,,2761.103885,29.521913,711.400083,711.936760,65.139743,-2.261192,744.898308,31170.896027


In [54]:
new_df = new_df.drop(columns=['new_col','Year', 'Total Population','Bachelor degree','Total Housing',
                              'Total Occupied housing units', 'Total Vacant housing units',
                              'Total Owner occupied housing units',
                'Total Renter occupied housing units','Median Household Income (USD)','Percentage of Rent'])
#new_df = new_df.drop(columns=['Total Occupied housing units', 'Total Vacant housing units'])
new_df.iloc[:, 2:10] = new_df.iloc[:, 2:10].astype(int)
new_df['Monthly Percentage of Rent'] = new_df['Monthly Total Renter occupied housing units']/new_df['Monthly Total Housing']


  new_df.iloc[:, 2:10] = new_df.iloc[:, 2:10].astype(int)


In [55]:
new_df.head()

Unnamed: 0,y_dt,zip,Monthly Total Population,Monthly Bachelor degree,Monthly Total Housing,Monthly Total Occupied housing units,Monthly Total Vacant housing units,Monthly Total Owner occupied housing units,Monthly Total Renter occupied housing units,Monthly Median Household Income (USD),Monthly Percentage of Rent
0,2010-01-01,30002,17126,3181,7762,6811,951,2807,4004,39473,0.515846
748,2010-02-01,30002,17069,3211,7750,6803,947,2814,3988,39456,0.514581
1496,2010-03-01,30002,17016,3240,7741,6795,945,2821,3972,39447,0.513112
2244,2010-04-01,30002,16966,3269,7733,6788,944,2828,3958,39443,0.511832
2992,2010-05-01,30002,16920,3296,7727,6781,945,2835,3943,39447,0.510289


In [56]:
new_df['Bachelor or higher Precentage'] = new_df['Monthly Bachelor degree'] / new_df['Monthly Total Population']

In [57]:
new_df.head(10)

Unnamed: 0,y_dt,zip,Monthly Total Population,Monthly Bachelor degree,Monthly Total Housing,Monthly Total Occupied housing units,Monthly Total Vacant housing units,Monthly Total Owner occupied housing units,Monthly Total Renter occupied housing units,Monthly Median Household Income (USD),Monthly Percentage of Rent,Bachelor or higher Precentage
0,2010-01-01,30002,17126,3181,7762,6811,951,2807,4004,39473,0.515846,0.185741
748,2010-02-01,30002,17069,3211,7750,6803,947,2814,3988,39456,0.514581,0.188119
1496,2010-03-01,30002,17016,3240,7741,6795,945,2821,3972,39447,0.513112,0.190409
2244,2010-04-01,30002,16966,3269,7733,6788,944,2828,3958,39443,0.511832,0.192679
2992,2010-05-01,30002,16920,3296,7727,6781,945,2835,3943,39447,0.510289,0.194799
3740,2010-06-01,30002,16877,3322,7722,6774,948,2841,3930,39457,0.508936,0.196836
4488,2010-07-01,30002,16837,3348,7720,6768,952,2848,3916,39473,0.507254,0.198848
5236,2010-08-01,30002,16801,3372,7719,6762,957,2854,3904,39497,0.505765,0.200702
5984,2010-09-01,30002,16768,3395,7720,6756,964,2860,3892,39526,0.504145,0.202469
6732,2010-10-01,30002,16738,3417,7722,6750,972,2866,3880,39563,0.502461,0.204146


In [58]:
new_df.to_csv('2010-2019_Atlanta_ACS_monthly.csv', index=False)

In [None]:
new_df.to_csv('M_AT_ACS_2010-2019.csv', index=False)