In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

# Once either this notebook or the power plant notebook are merged to main, this function can be called

In [2]:
def pull_gpkg_from_directory(bucket_name, directory):
    """
    Pulls GeoPackage files from a specified directory in an S3 bucket.
    
    Parameters:
    - bucket_name (str): The name of the S3 bucket.
    - directory (str): The directory within the bucket to search for GeoPackage files.
    """
    # Create an S3 client
    s3 = boto3.client('s3')

    # List objects in the specified directory
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=directory)

    # Check if objects were found
    if 'Contents' in response:
        # Iterate through each object found
        for obj in response['Contents']:
            # Get the key (filename) of the object
            key = obj['Key']
            
            # Check if the object is a .gpkg file
            if key.endswith('.gpkg'):
                # Download the GeoPackage file into memory
                gpkg_object = s3.get_object(Bucket=bucket_name, Key=key)
                gpkg_data = io.BytesIO(gpkg_object['Body'].read())
                
                # Save the GeoPackage file locally
                gpkg_filename = os.path.basename(key)
                with open(gpkg_filename, 'wb') as gpkg_file:
                    gpkg_file.write(gpkg_data.getvalue())
                
                print(f"Saved GeoPackage as '{gpkg_filename}' locally")
                # You can now use the saved file for further processing
    else:
        print("No objects found in the specified directory.")

In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/built_environment/communication_infrastructure/homeland_infrastructure_foundation_level_data/'

pull_gpkg_from_directory(bucket_name, aws_dir)

Saved GeoPackage as 'built_hifld_cellular_towers.gpkg' locally
Saved GeoPackage as 'built_hifld_microwave_towers.gpkg' locally
Saved GeoPackage as 'built_hifld_mobile_towers.gpkg' locally
Saved GeoPackage as 'built_hifld_paging_towers.gpkg' locally
Saved GeoPackage as 'built_hifld_radio_towers.gpkg' locally
Saved GeoPackage as 'built_hifld_tv_contour.gpkg' locally


In [5]:
cellular_towers_data = gpd.read_file('built_hifld_cellular_towers.gpkg')
print('complete')
microwave_towers_data = gpd.read_file('built_hifld_microwave_towers.gpkg')
print('complete')
mobile_towers_data = gpd.read_file('built_hifld_mobile_towers.gpkg')
print('complete')
paging_towers_data = gpd.read_file('built_hifld_paging_towers.gpkg')
print('complete')
radio_towers_data = gpd.read_file('built_hifld_radio_towers.gpkg')
print('complete')

complete
complete
complete
complete
complete


In [83]:
paging_towers_data.columns

Index(['UniqSysID', 'Licensee', 'Callsign', 'LocNum', 'LatDeg', 'LatMin',
       'LatSec', 'LatDir', 'LonDeg', 'LonMin', 'LonSec', 'LonDir', 'LocAdd',
       'LocCity', 'LocCounty', 'LocState', 'Nepa', 'QZone', 'TowReg',
       'SupStruc', 'AllStruc', 'StrucType', 'LicStatus', 'RSC', 'latdec',
       'londec', 'url', 'USCB_STATEFP', 'USCB_COUNTYFP', 'USCB_TRACTCE',
       'USCB_GEOID', 'USCB_NAME', 'USCB_NAMELSAD', 'USCB_MTFCC',
       'USCB_FUNCSTAT', 'USCB_ALAND', 'USCB_AWATER', 'USCB_INTPTLAT',
       'USCB_INTPTLON', 'geometry'],
      dtype='object')

In [81]:
radio_towers_data.columns

Index(['OBJECTID', 'CALLSIGN', 'FREQUENCY', 'SERVICE', 'CLASS', 'STATUS',
       'CITY', 'STATE', 'COUNTRY', 'FILENUM', 'FACID', 'LAT', 'LON',
       'LICENSEE', 'LATDD', 'LONDD', 'USCB_STATEFP', 'USCB_COUNTYFP',
       'USCB_TRACTCE', 'USCB_GEOID', 'USCB_NAME', 'USCB_NAMELSAD',
       'USCB_MTFCC', 'USCB_FUNCSTAT', 'USCB_ALAND', 'USCB_AWATER',
       'USCB_INTPTLAT', 'USCB_INTPTLON', 'geometry'],
      dtype='object')

In [82]:
radio_towers_data

Unnamed: 0,OBJECTID,CALLSIGN,FREQUENCY,SERVICE,CLASS,STATUS,CITY,STATE,COUNTRY,FILENUM,...,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,3,DK201FB,88.1 MHz,FX,D,LIC,TULARE,CA,US,BLFT -20000911AEU,...,06107002402,24.02,Census Tract 24.02,G5020,S,141055701,977010,+36.1783683,-119.2649899,POINT (-119.25176 36.22562)
1,4435,KCRZ,104.9 MHz,FM,A,LIC,TIPTON,CA,US,BLH -19951107KC,...,06107002402,24.02,Census Tract 24.02,G5020,S,141055701,977010,+36.1783683,-119.2649899,POINT (-119.25205 36.16862)
2,7,DK203CP,88.5 MHz,FX,D,LIC,PENRYN,CA,US,BLFT -19980225TB,...,06061020502,205.02,Census Tract 205.02,G5020,S,26617928,917613,+38.8567146,-121.0997167,POINT (-121.11184 38.87025)
3,6035,KKFS,103.9 MHz,FM,A,LIC,LINCOLN,CA,US,BLH -20050422AAK,...,06061020502,205.02,Census Tract 205.02,G5020,S,26617928,917613,+38.8567146,-121.0997167,POINT (-121.12604 38.87575)
4,13,DK208EI,89.5 MHz,FX,D,LIC,PORTERVILLE,CA,US,BLFT -20040604ACQ,...,06107003503,35.03,Census Tract 35.03,G5020,S,3228854,0,+36.0893252,-119.0618748,POINT (-119.06405 36.09392)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1361,9699,KYRR-LP,93.3 MHz,FL,L1,LIC,NEVADA CITY,CA,US,BLL -20050119ADX,...,06057000801,8.01,Census Tract 8.01,G5020,S,155917400,2856299,+39.3058929,-120.9405534,POINT (-120.94244 39.31574)
1362,9789,KZED-LP,107.5 MHz,FL,L1,LIC,LA GRANGE,CA,US,BLL -20060410ADT,...,06043000200,2,Census Tract 2,G5020,S,745970006,13592003,+37.6729487,-120.1089266,POINT (-120.35660 37.66248)
1363,9822,KZIQ-FM,92.7 MHz,FM,A,LIC,RIDGECREST,CA,US,BLH -20041210ABI,...,06029005410,54.10,Census Tract 54.10,G5020,S,7863641,45701,+35.6043775,-117.6480289,POINT (-117.64401 35.61615)
1364,9947,KZSU,90.1 MHz,FM,A,LIC,STANFORD,CA,US,BLED -1774,...,06085511705,5117.05,Census Tract 5117.05,G5020,S,12543868,0,+37.4057029,-122.1619140,POINT (-122.17914 37.41167)


bigger file, running later

In [None]:
tv_contour_data = gpd.read_file('built_hifld_tv_contour.gpkg')

In [64]:
# Read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)

# Select columns and rename
filtered_ca_boundaries = ca_boundaries[['GEOID', 'geometry']].copy()
filtered_ca_boundaries.rename(columns={'GEOID': 'tract'}, inplace=True)

# Modify 'tract' column
filtered_ca_boundaries['tract'] = filtered_ca_boundaries['tract'].str[1:]
filtered_ca_boundaries = filtered_ca_boundaries.to_crs(crs=4269) 
# Output the modified GeoDataFrame
filtered_ca_boundaries

Unnamed: 0,tract,geometry
0,6085504321,"POLYGON ((-121.87556 37.39924, -121.87535 37.3..."
1,6085504410,"POLYGON ((-121.88886 37.40758, -121.88576 37.4..."
2,6085507003,"POLYGON ((-122.02489 37.21683, -122.02459 37.2..."
3,6085507004,"POLYGON ((-121.99304 37.22562, -121.99249 37.2..."
4,6085502204,"POLYGON ((-121.93167 37.29803, -121.92801 37.3..."
...,...,...
9124,6059001303,"POLYGON ((-117.95917 33.92458, -117.95888 33.9..."
9125,6059001304,"POLYGON ((-117.95918 33.92820, -117.95831 33.9..."
9126,6059001401,"POLYGON ((-117.95056 33.94503, -117.95055 33.9..."
9127,6013367200,"POLYGON ((-122.34551 37.96355, -122.34550 37.9..."


In [100]:
communication_infrastructure_data = [cellular_towers_data, microwave_towers_data, mobile_towers_data, paging_towers_data, radio_towers_data]
words_to_search = ['OBJECTID','UniqSysID','Licensee', 'CALLSIGN', 'LocCounty', 'CITY', 'AllStruc', 'StrucType', 'LicStatus', 'STATUS', 'FREQUENCY', 'geometry']

def filter_and_spatial_join(data_list, filtered_ca_boundaries, words_to_search):
    tract_count_dfs = {}
    county_count_dfs = {}
    
    for df, df_name in zip(data_list, ['cellular_towers_data', 'microwave_towers_data', 'mobile_towers_data', 'paging_towers_data', 'radio_towers_data']):
        filtered_df = df[[col for col in df.columns if any(word in col for word in words_to_search)]].copy()
        filtered_df.index = df.index
        filtered_df = filtered_df.to_crs(crs=4269) 

        # Perform the spatial join
        joined_df = gpd.sjoin(filtered_df, filtered_ca_boundaries, how='right', predicate='within')
        joined_df = joined_df.drop_duplicates(subset=['tract'])

        # Use 'UniqSysID' if it exists, otherwise use 'OBJECTID'
        if 'UniqSysID' in joined_df.columns:
            id_column = 'UniqSysID'
        elif 'OBJECTID' in joined_df.columns:
            id_column = 'OBJECTID'
        else:
            raise ValueError(f"Neither 'UniqSysID' nor 'OBJECTID' found in the DataFrame for {df_name}")

        '''if 'LocCounty' in joined_df.columns:
            county_id = 'LocCounty'
        elif 'USCB_COUNTYFP' in joined_df.columns:
            county_id = 'USCB_COUNTYFP'
        # Remove duplicates based on 'tract' and the chosen ID column
        joined_df = joined_df.drop_duplicates(subset=['tract', id_column])'''

        # Group by 'tract' and count non-null values of the chosen ID column
        tract_power_counts = joined_df.groupby('tract')[id_column].apply(lambda x: x.notnull().sum()).reset_index(name=f"{df_name}_count")
        county_power_counts = joined_df.groupby('LocCounty')[id_column].apply(lambda x: x.notnull().sum()).reset_index(name=f"{df_name}_count")

        tract_df_name = f"tract_count_{df_name}"
        county_df_name = f"county_count_{df_name}"

        tract_count_dfs[tract_df_name] = tract_power_counts
        county_count_dfs[county_df_name] = county_power_counts
        
        # Dynamically create global variables
        globals()[tract_df_name] = tract_power_counts
        globals()[county_df_name] = county_power_counts

        print(tract_df_name)
        print(county_df_name)
    
    return tract_count_dfs, county_count_dfs

tract_count_dfs, county_count_dfs = filter_and_spatial_join(communication_infrastructure_data, filtered_ca_boundaries, words_to_search)


tract_count_cellular_towers_data
county_count_cellular_towers_data
tract_count_microwave_towers_data
county_count_microwave_towers_data
tract_count_mobile_towers_data
county_count_mobile_towers_data
tract_count_paging_towers_data
county_count_paging_towers_data


KeyError: 'LocCounty'

In [104]:
# Read in CA census tiger file
county_tract = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(county_tract)

In [107]:
ca_tract_county = ca_tract_county.rename(columns={'TRACT':'tract'})
ca_tract_county

Unnamed: 0,field_1,tract,COUNTYFP,County,geometry
0,0,06085504321,085,Santa Clara,
1,1,06085504410,085,Santa Clara,
2,2,06085507003,085,Santa Clara,
3,3,06085507004,085,Santa Clara,
4,4,06085502204,085,Santa Clara,
...,...,...,...,...,...
9124,9124,06059001303,059,Orange,
9125,9125,06059001304,059,Orange,
9126,9126,06059001401,059,Orange,
9127,9127,06013367200,013,Contra Costa,


In [101]:
tract_count_cellular_towers_data


Unnamed: 0,tract,cellular_towers_data_count
0,6001400100,0
1,6001400200,0
2,6001400300,0
3,6001400400,0
4,6001400500,0
...,...,...
9124,6115040902,1
9125,6115041001,0
9126,6115041002,0
9127,6115041101,1


In [108]:
merging_test = pd.merge(ca_tract_county, tract_count_cellular_towers_data, how='left', on='tract')
merging_test

Unnamed: 0,field_1,tract,COUNTYFP,County,geometry,cellular_towers_data_count
0,0,06085504321,085,Santa Clara,,
1,1,06085504410,085,Santa Clara,,
2,2,06085507003,085,Santa Clara,,
3,3,06085507004,085,Santa Clara,,
4,4,06085502204,085,Santa Clara,,
...,...,...,...,...,...,...
9124,9124,06059001303,059,Orange,,
9125,9125,06059001304,059,Orange,,
9126,9126,06059001401,059,Orange,,
9127,9127,06013367200,013,Contra Costa,,


In [117]:
Fresno = merging_test[merging_test['County']=='Yuba']
#Fresno = Fresno[Fresno['cellular_towers_data_count'].notna()]
pd.set_option('display.max_rows', None)  # None means display all rows

Fresno

Unnamed: 0,field_1,tract,COUNTYFP,County,geometry,cellular_towers_data_count
2013,2013,6115040500,115,Yuba,,
2014,2014,6115040600,115,Yuba,,
2936,2936,6115040100,115,Yuba,,
2937,2937,6115040800,115,Yuba,,
2938,2938,6115040902,115,Yuba,,
2939,2939,6115040901,115,Yuba,,
2940,2940,6115040400,115,Yuba,,
4236,4236,6115040301,115,Yuba,,
4303,4303,6115040302,115,Yuba,,
5835,5835,6115041102,115,Yuba,,


In [102]:
county_count_cellular_towers_data

Unnamed: 0,LocCounty,cellular_towers_data_count
0,ALAMEDA,6
1,ALPINE,1
2,AMADOR,7
3,BUTTE,10
4,CALAVERAS,5
5,COLUSA,5
6,CONTRA COSTA,14
7,DEL NORTE,4
8,EL DORADO,13
9,FRESNO,38
