### Cal-CRAI Subsetting -- TV Contour Data
This notebook processes TV Broadcast Contours data sourced from Homeland Infrastructure Foundation-Level Data

Data subsetting includes:
* Isolating for 'STA' status TV broadcast contour stations

Output is uploaded to 2a_subset directory within AWS

In [1]:
import geopandas as gpd
import os
import boto3
from zipfile import ZipFile
import shutil
import sys
import os
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)

In [2]:
@append_metadata
def subset_tv_contours(export=False, varname=''):
    """
    The TV Broadcast Contours shapefile is large and includes data irrelevant to our Index. 
    With little source data documentation, we internally decided to subset the data to 'STA' 
    status TV broadcast contours as it included much of the data while reducing overlap. 

    Script
    ------
    tv_contour_subset.ipynb
    """
    print("Data transformation: Subset to 'STA' status stations")

    # Initialize the S3 client
    s3_client = boto3.client('s3')
    
    # Bucket name and file paths
    bucket_name = 'ca-climate-index'
    directory = '1_pull_data/built_environment/communication_infrastructure/homeland_infrastructure_foundation_level_data/TV_Broadcast_Contours.zip'
    out_directory = '2a_subset/built_environment/communication_infrastructure/homeland_infrastructure_foundation_level_data/'
    
    # Local directory to store the downloaded zip file and extracted contents
    local_directory = 'temp'
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)
    
    # Download the zip file
    #print(f'Pulling TV broadcast contour data from S3 bucket: {directory}')
    local_zip_file_path = os.path.join(local_directory, os.path.basename(directory))
    s3_client.download_file(bucket_name, directory, local_zip_file_path)
    
    # Extract the contents of the zip file
    with ZipFile(local_zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(local_directory)
    
    # Manipulate the shapefile within the extracted contents
    shapefile_path = None
    
    if export == True:
        for filename in os.listdir(local_directory):
            if filename.endswith('.shp'):
                shapefile_path = os.path.join(local_directory, filename)
                # Perform your manipulation with the shapefile using geopandas or other libraries
                #print("Isolating shapefile to 'STA' status stations.")
                gdf = gpd.read_file(shapefile_path)
                sta_gdf = gdf[gdf['STATUS'] == 'STA']
                #print('Isolation complete, now making a folder to hold new datafile')
                
                # Create a directory to store the shapefile and its associated files
                output_folder = os.path.join(local_directory, 'output_shapefile')
                os.makedirs(output_folder, exist_ok=True)
                
                # Save the filtered shapefile inside the output folder
                output_shapefile_path = os.path.join(output_folder, 'sta_tv_contours.shp')
                sta_gdf.to_file(output_shapefile_path)
                
                # Zip the output folder
                #print('Zipping file')
                output_zip_file_path = os.path.join(local_directory, 'sta_tv_contours.zip')
                shutil.make_archive(output_zip_file_path[:-4], 'zip', output_folder)
                
                #print('Zipping complete, now uploading to AWS')
                # Upload the zipped folder to AWS S3
                s3_client.upload_file(output_zip_file_path, bucket_name, os.path.join(out_directory, 'sta_tv_contours.zip'))
                #print(f'Zipped file uploaded to S3 bucket: {out_directory}')

    if export == False:
        print(f'sta_tv_contours.zip uploaded to AWS.')        
    # Clean up temporary files
    os.remove(local_zip_file_path)  # Remove the downloaded zip file
    shutil.rmtree(local_directory)  # Remove the extracted files except the new shapefile


In [3]:
subset_tv_contours(export=False, varname='built_hifld_tv_contour')