## Process NEI and CCS emissions for a specific facility

This script applies the CCS emissions (without NH3 and VOC) only for the two facilities (see below). 

Author : Yunha Lee

Date: March 11, 2025

In [None]:
import geopandas as gpd
from pyproj import CRS
import os, sys

# Add the path to the main package directory
package_path = os.path.abspath('/Users/yunhalee/Documents/LOCAETA/LOCAETA_AQ/LOCAETA_AQ')
if package_path not in sys.path:
    sys.path.append(package_path)

import emission_processing

# Read the point source emissions

original_emis = '/Users/yunhalee/Documents/LOCAETA/CS_emissions/Colorado_CCS_combined_NEI_point_oilgas_ptegu_ptnonimps_wo_NH3_VOC.shp'
gdf_ccs = gpd.read_file(original_emis)

# Reset index to ensure proper comparison
gdf_ccs.reset_index(drop=True, inplace=True)

target_crs = "+proj=lcc +lat_1=33.000000 +lat_2=45.000000 +lat_0=40.000000 +lon_0=-97.000000 +x_0=0 +y_0=0 +a=6370997.000000 +b=6370997.000000 +to_meter=1"
gdf_ccs = emission_processing.reproject_and_save_gdf(gdf_ccs, target_crs)


In [None]:
# Apply CCS emissions to a facility of interest
facility_eis_id = {'Suncor':1099511, 'Cherokee':3555811 } 
                   #'Suncor':1099511, } # 17445711} # Suncor frs_id = '1007923'
		# Cherokee plant alone (facility ID 1007207)  EIS_ID = 17445711
        # NEI emission without landfill facility ID 1007709


species_list = ['NOx', 'SOx', 'PM2_5'] # VOC and NH3 are excluded because the input emissions has NEI emissions for VOC and NH3. 

for key, id in facility_eis_id.items():

    gdf_emis = gdf_ccs.copy()

    if isinstance(id, int):
        print(f"{id} is integer")
    else:
        print(f"{id} must be integer")
        id = int(id)

    print(f"matching facility: {gdf_emis[gdf_emis['EIS_ID'] == id]}" )

    for spec in species_list:

        # Create a mask for where EIS_ID values are not matching
        valid_ccs_mask = gdf_emis['EIS_ID'] != id

        # Where the mask is True, replace with NEI emissions value
        gdf_emis.loc[valid_ccs_mask, spec] = gdf_emis.loc[valid_ccs_mask, spec + '_old']


    # save the final gdf     
    final_emis_file = f'/Users/yunhalee/Documents/LOCAETA/CS_emissions/Colorado_point_{key}_{id}_CCS_combined_NEI_point_oilgas_ptegu_ptnonimps_wo_NH3_VOC.shp' #Colorado_CCS_combined_NEI_point_oilgas_ptegu_ptnonimps.shp'

    if not isinstance(gdf_emis, gpd.GeoDataFrame):
        raise TypeError("The object is not a GeoDataFrame")
    else:
        if gdf_emis.crs != CRS.from_string(target_crs):
            raise ValueError(f"The GeoDataFrame CRS does not match the target CRS: {target_crs}")
        else: 
            gdf_emis.to_file(final_emis_file)
            print(f"New emission shapefile saved to: {final_emis_file}")


## Exclude a specific facility from NEI emissions

Note that Landfill is not existed in CCS emissions. 

In [None]:
import geopandas as gpd
from pyproj import CRS
import os, sys

# Add the path to the main package directory
package_path = os.path.abspath('/Users/yunhalee/Documents/LOCAETA/LOCAETA_AQ/LOCAETA_AQ')
if package_path not in sys.path:
    sys.path.append(package_path)

import emission_processing

# Read the point source emissions

original_emis ='/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/evaldata_v1.6.1/2020_nei_emissions/new_NEI2020_pt_oilgas_ptegu_ptnonipm.shp'
gdf_nei = gpd.read_file(original_emis)

# Reset index to ensure proper comparison
gdf_nei.reset_index(drop=True, inplace=True)

target_crs = "+proj=lcc +lat_1=33.000000 +lat_2=45.000000 +lat_0=40.000000 +lon_0=-97.000000 +x_0=0 +y_0=0 +a=6370997.000000 +b=6370997.000000 +to_meter=1"
gdf_nei = emission_processing.reproject_and_save_gdf(gdf_nei, target_crs)
print(f"Rows before filtering: {len(gdf_nei)}")

In [None]:
# exclude the landfill facility ID 1007709 (EIS_ID = 2001411 )
facility_eis_id = {'Landfill':2001411}

for key, id in facility_eis_id.items():

    if not isinstance(id, int):
        print(f"{id} must be integer")
        id = int(id)

    print(f"Rows before filtering: {len(gdf_nei)}")

    gdf_drop = gdf_nei[gdf_nei['EIS_ID'] == id]

    print(gdf_drop)

    # Drop rows where 'EIS_ID' is equal to id
    gdf_nei = gdf_nei[gdf_nei['EIS_ID'] != id]
    
    print(f"Rows after filtering: {len(gdf_nei)}")

    # save the final gdf     
    final_emis_file = f'/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/evaldata_v1.6.1/2020_nei_emissions/new_NEI2020_pt_oilgas_ptegu_ptnonipm_excluding_{key}_{id}.shp'

    if not isinstance(gdf_nei, gpd.GeoDataFrame):
        raise TypeError("The object is not a GeoDataFrame")
    else:
        if gdf_nei.crs != CRS.from_string(target_crs):
            raise ValueError(f"The GeoDataFrame CRS does not match the target CRS: {target_crs}")
        else: 
            gdf_nei.to_file(final_emis_file)
            print(f"New emission shapefile saved to: {final_emis_file}")

## Create new facility emissions (here data center) into NEI emissions

In [None]:
import geopandas as gpd
import pandas as pd
from pyproj import CRS
import os, sys


input_file = '/Users/yunhalee/Documents/LOCAETA/CS_emissions/Data center case study NOx 2ppm.csv'

df = pd.read_csv(input_file, encoding='utf-8')

target_crs = "+proj=lcc +lat_1=33.000000 +lat_2=45.000000 +lat_0=40.000000 +lon_0=-97.000000 +x_0=0 +y_0=0 +a=6370997.000000 +b=6370997.000000 +to_meter=1"

# Create a GeoDataFrame from the DataFrame, using points_from_xy to create Point geometries
gdf = gpd.GeoDataFrame(df, 
                        geometry=gpd.points_from_xy(df['lon'], df['lat']), 
                        crs="EPSG:4326") # Assuming input is WGS84

gdf.drop(columns=['lon','lat'], inplace=True)
# Re-project to the target CRS
gdf = gdf.to_crs(target_crs)

output_file = '/Users/yunhalee/Documents/LOCAETA/CS_emissions/Data_center_NOx_2ppm.shp'

gdf.to_file(output_file, driver='ESRI Shapefile')

## Merge new data center emission into NEI point source shape file

In [None]:
nei_file = '/Users/yunhalee/Documents/LOCAETA/RCM/INMAP/evaldata_v1.6.1/2020_nei_emissions/combined_NEI2020_pt_oilgas_ptegu_ptnonipm.shp'

# Load shapefile with county boundaries and FIPS codes
nei = gpd.read_file(nei_file)

eis_list = nei['EIS_ID'].unique()

if '00000000' in eis_list:
    print("overlap")
else:
    print("good to use")

In [None]:
if '00000' in eis_list:
    print("overlap")
else:
    print("good to use")

## Find FIPS at given lat and long

In [None]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd


# Create a GeoDataFrame from the DataFrame, using points_from_xy to create Point geometries
gdf = gpd.GeoDataFrame(df, 
                        geometry=gpd.points_from_xy(df['lon'], df['lat']), 
                        crs="EPSG:4326") # Assuming input is WGS84
                        
county_shapefile_path = "/Users/yunhalee/Documents/LOCAETA/NEI_emissions/NEI_2020_gaftp_Jun2024/emiss_shp2020/Census/cb_2020_us_county_500k.shp"

# Load shapefile with county boundaries and FIPS codes
counties = gpd.read_file(county_shapefile_path)

counties = counties.to_crs(target_crs)

# Perform spatial join
join_results = gpd.sjoin(gdf, counties, how="inner", op="within")

# Extract FIPS codes
print(join_results['GEOID'])

In [None]:
join_results