In [None]:
import geopandas as gp
import matplotlib.pyplot as plt
from shapely.geometry import *
import pandas as pd
import numpy as np

Canada demand as a function of NAICS

In [None]:
canada_demand = pd.read_csv(r'C:\Users\mhardika\Documents\AMO\GeoToolAll_Methods\Water Source Data\Industrial\canada_demand_naics.csv')
state_demand = pd.read_csv(r'C:\Users\mhardika\Documents\AMO\GeoToolAll_Methods\Water Source Data\Industrial\water_demand_by_state.csv')
county_demand = pd.read_csv(r'C:\Users\mhardika\Documents\AMO\GeoToolAll_Methods\Water Source Data\Industrial\USGS_industrial_metric.csv')

USA : water demand as a function of NAICS

In [None]:
# Canada data is in million cubic meter
canada_total = canada_demand['annual_demand'].sum()
usa_total = state_demand['IN-WFrTo'].sum()
# print(usa_total/1e6)
# print(canada_total)

naics_fraction = canada_demand['fraction']
state_list = state_demand['state']
county_list = county_demand['COUNTY']
naics_list = canada_demand['naics']

usa_naics_fraction_df = pd.DataFrame(columns = ['naics','fraction','water_demand (m3/yr)'])
usa_naics_fraction = naics_fraction*usa_total
usa_naics_fraction_df['naics'] = naics_list
usa_naics_fraction_df['fraction'] = naics_fraction
usa_naics_fraction_df['water_demand (m3/yr)'] = usa_naics_fraction
usa_naics_fraction_df

Creating table for demand data based on Canada data

In [None]:
columns = ['state','county','naics', 'state_demand (m3/yr)', 'county_demand (m3/yr)','naics_fraction','naics_demand (m3/yr)']
demand_table = pd.DataFrame(columns = columns)
for state in ['CO']:
    county_list = county_demand[county_demand['STATE']==state]['COUNTY']
    for county in county_list:
        for naics in naics_list:
            temp = {
                'state':state,
                'county':county,
                'naics':naics,
                'state_demand (m3/yr)': state_demand[state_demand['state']==state]['IN-WFrTo'].values[0],
                'county_demand (m3/yr)': county_demand[(county_demand['STATE']==state) & (county_demand['COUNTY']==county)]['IN-WFrTo'].values[0],
                'naics_fraction': canada_demand[canada_demand['naics']==naics]['fraction'].values[0],
                'naics_demand (m3/yr)': 0,
            }

            temp = pd.DataFrame([temp])
            demand_table= pd.concat([demand_table,temp])
            demand_table.reset_index()
# temp
demand_table


Creating table format for final place and demand data

In [None]:
columns = ['state','county','location','naics','employee count','capacity_fraction','water demand (m3/yr)']
industry_demand = pd.DataFrame(columns=columns)

Reading industry location file

In [None]:
df = gp.read_file('/Users/mhardika/Documents/AMO/GeoToolAll_Methods/Water Source Data/Industrial/General_Manufacturing_Facilities/General_Manufacturing_Facilities.shp')
# df = df.to_crs("EPSG:4326")
# # fig, ax = plt.subplots()
# # df.plot(ax=ax,figsize=(50, 50))

In [None]:
# Drop rows with none in the index
df = df.dropna()
df_state = df.set_index('STATE')
df_state.index.unique()

In [None]:
# Drop non-contiguous states
contiguous_usa = ('AL','AZ','AR','CA','CO','CT','DE','FL','GA','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE',
                  'NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY','DC')

for state in df_state.index.unique():
    if state in contiguous_usa:
        pass
    else:
        df_state.drop(index=state,inplace=True) 
df_state = df_state.reset_index()

In [None]:
df_state['geometry'].crs

In [None]:
industry_demand['state'] = df_state['STATE']
industry_demand['county'] = [county.capitalize() + ' County' for county in df_state['COUNTY']]
industry_demand['location'] = df_state['geometry']
industry_demand['naics'] = df_state['NAICS'].astype(str).str[:3]
industry_demand['employee count']=df_state['EMP']
industry_demand['capacity_fraction']=0
industry_demand[industry_demand['state']=='AZ'].head()

In [None]:
# Drop rows where naics is outside the values below
naics_list = (311,312,313,314,321,322,324,325,326,327,331,332,333,334,335,336,339)
industry_demand_naics = industry_demand.set_index('naics')
for code in industry_demand_naics.index.unique():
    if code == 'N/A':
        industry_demand_naics.drop(index=code,inplace=True)
    elif int(code) in naics_list:
        pass
    else:
        industry_demand_naics.drop(index=code,inplace=True) 
industry_demand_naics = industry_demand_naics.reset_index()
industry_demand_naics


In [None]:
industry_demand_naics[industry_demand_naics['county']=='Apache County']

Calculating water demand using capacity fraction and water demand across USA for a specific NAICS

In [None]:
for code in industry_demand_naics['naics'].unique():
    industry_demand_naics.loc[industry_demand_naics.naics == code, 'capacity_fraction'] = industry_demand_naics[industry_demand_naics['naics']==code]['employee count']/sum(industry_demand_naics[industry_demand_naics['naics']==code]['employee count'])
    industry_demand_naics.loc[industry_demand_naics.naics == code, 'water demand (m3/yr)'] =  industry_demand_naics[industry_demand_naics['naics']==code]['capacity_fraction']*usa_naics_fraction_df[usa_naics_fraction_df['naics']==int(code)]['water_demand (m3/yr)'].values[0]

industry_demand_naics

Comparison of calculated county level water demand vs USGS

In [None]:
county_demand_calc = sum

county_demand_calc = pd.DataFrame(columns = ['county','demand'])
county_list = county_demand[county_demand['STATE']=='CO']['COUNTY']

for county in county_list:
    temp = {'county': county,
            'demand':industry_demand_naics[industry_demand_naics['county']==county]['water demand (m3/yr)'].sum()}

    temp = pd.DataFrame([temp])
    county_demand_calc= pd.concat([county_demand_calc,temp])

county_demand_calc.reset_index()
county_demand_calc= county_demand_calc.sort_values(by=['county'])

In [None]:
county_demand_calc

In [None]:
demand_table_sample = demand_table[demand_table['state']=='CO'].groupby('county').first()
demand_table_sample

In [None]:
fig,ax = plt.subplots(figsize=(25,10))
x = np.array(range(0,len(county_list)))

ax.bar(x-0.125, county_demand_calc['demand'], width = 0.25,label='Calculated')
ax.bar(x+0.125, demand_table_sample.sort_values(by=['county'])['county_demand (m3/yr)'], width = 0.25, label = 'County level Census')
ax.legend()

ax.set_xticks(range(0,len(county_list)),county_list,rotation = 90)

frac = sum( county_demand_calc['demand'])/sum(demand_table_sample['county_demand (m3/yr)'])
print(frac*100)

In [None]:
# Add county wise and NAICS wise capacity fraction

for state in ['AZ']:#state_list:
    county_list = county_demand[county_demand['STATE']==state]['COUNTY']
    industry_demand.loc[industry_demand.state == state,'state demand (m3/yr)'] = demand_table[(demand_table['state']==state)]['state_demand (m3/yr)'].unique()[0]
    for county in county_list:
        frac = industry_demand[(industry_demand['state']==state) & (industry_demand['county']==county)]['employee count']/sum(industry_demand[(industry_demand['state']==state) & (industry_demand['county']==county)]['employee count'])
        industry_demand.loc[(industry_demand.state == state) & (industry_demand.county == county) ,'capacity_fraction'] = frac      
        industry_demand.loc[(industry_demand.state == state) & (industry_demand.county == county) ,'county demand (m3/yr)'] = demand_table[(demand_table['state']==state) & (demand_table['county']==county)]['county_demand (m3/yr)'].unique()[0]


industry_demand['water demand (m3/yr)'] = industry_demand['capacity_fraction']*industry_demand['county demand (m3/yr)']
industry_demand[industry_demand.state == 'AZ'].head()

In [None]:
industry_demand.capacity_fraction.max

In [None]:
us_counties = gp.read_file(r'\Users\mhardika\Documents\AMO\GeoToolAll_Methods\GeoData\US_County_Boundaries\US_CountyBndrys.shp')
us_counties = us_counties.to_crs("EPSG:4326")

In [None]:
centroids_all = gp.GeoDataFrame(geometry=industry_demand_naics[industry_demand_naics['state']=='CO'].location)
point = centroids_all.dissolve().centroid
point = point.to_crs("EPSG:4326")

In [None]:
point

In [None]:
state_code = '08'
state = us_counties.loc[us_counties['STATEFP']==state_code]

fig, (ax) = plt.subplots(figsize=(5,5))

# All places
state.plot(ax=ax,figsize=(50, 50),facecolor ='none',edgecolor ='black')
centroids = gp.GeoSeries(industry_demand_naics[industry_demand_naics['state']=='CO'].location)
centroids = centroids.to_crs("EPSG:4326")
centroids.plot(ax=ax)
point.plot(ax=ax)

Attempting clustering of industries for Colorado->Writing to file Colorado data

In [None]:
industry_demand_naics[industry_demand_naics['state']=='CO'].to_csv(r'\Users\mhardika\Documents\AMO\GeoToolAll_Methods\Water Source Data\Industrial\co_location_data.csv')

In [None]:
industry_demand_naics[industry_demand_naics['state']=='CO'].location.values[0].crs()