# Collect GCAM Data

This notebook collects and organizes the total generation (MWh) by technology in 2050 under the Clean Grid and Business-as-usual Scenarios

### Download the GCAM database here: https://zenodo.org/records/8377779

## Steps:

1. Download the database linked above to the  `/data/input_data/gcam_data` folder of this repository
2. Run the scripts below
3. Output file will be saved to `/data/output_data/generation_data`

### Imports

In [1]:
import gcamreader
import numpy as np
import pandas as pd
import os

### Data Paths

In [2]:
# set year of analysis
year = 2050

# data dir
data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data', 'input_data')

# gcam data dir
gcam_data_dir = os.path.join(data_dir, 'gcam_data')

# bau gcam database
gcam_db_path = os.path.join(gcam_data_dir, 'GODEEEP_GCAM-USA_Pathways')

# bau file
bau_gcam_db_file = 'Main_database_basexdb_BAU_Climate'

# net zero file
nz_gcam_db_file = 'Main_database_basexdb_NetZeroNoCCS_Climate'

# gcam query path
gcam_query_path = os.path.join(data_dir, 'gcam_query_xlm', 'subSetQueries.xml')

# output data dir
output_dir = os.path.join(os.path.dirname(os.getcwd()), 'data', 'output_data', 'generation_data')

# output file path
output_path = os.path.join(output_dir, f'gcam_generation_state_tech_{year}.csv')

# query name for the generation data
generation_query_name = 'elec gen by gen tech and cooling tech (incl cogen)'

### Settings

In [None]:
STATE_ABBREVIATIONS_TO_NAMES = {
    'AB': 'alberta', 'AK': 'alaska', 'AL': 'alabama', 'AR': 'arkansas', 'AZ': 'arizona',
    'CA': 'california', 'BC': 'british_columbia', 'MX': 'mexico',
    'CO': 'colorado', 'CT': 'connecticut', 'DC': 'district_of_columbia', 'DE': 'delaware',
    'FL': 'florida', 'GA': 'georgia', 'HI': 'hawaii', 'IA': 'iowa', 'ID': 'idaho', 'IL': 'illinois',
    'IN': 'indiana', 'KS': 'kansas', 'KY': 'kentucky', 'LA': 'louisiana', 'MA': 'massachusetts',
    'MD': 'maryland', 'ME': 'maine', 'MI': 'michigan', 'MN': 'minnesota', 'MO': 'missouri',
    'MS': 'mississippi', 'MT': 'montana', 'NC': 'north_carolina', 'ND': 'north_dakota',
    'NE': 'nebraska', 'NH': 'new_hampshire', 'NJ': 'new_jersey', 'NM': 'new_mexico', 'NV': 'nevada',
    'NY': 'new_york', 'OH': 'ohio', 'OK': 'oklahoma', 'OR': 'oregon', 'PA': 'pennsylvania',
    'RI': 'rhode_island', 'SC': 'south_carolina', 'SD': 'south_dakota', 'TN': 'tennessee',
    'TX': 'texas', 'UT': 'utah', 'VA': 'virginia', 'VT': 'vermont', 'WA': 'washington',
    'WI': 'wisconsin', 'WV': 'west_virginia', 'WY': 'wyoming'
}

### Functions

In [3]:
def get_query_by_name(queries, name):
    return next((x for x in queries if x.title == name), None)

### Connect and Process GCAM DATA

##### Step 1. Connect to the GCAM database

In [4]:
# net zero
nz_conn = gcamreader.LocalDBConn(gcam_db_path, nz_gcam_db_file)

# business-as-usual
bau_conn = gcamreader.LocalDBConn(gcam_db_path, bau_gcam_db_file)

Database scenarios: T_03b_NZnoCCS_climate
Database scenarios: T_01b_BAU_climate


##### Step 2. Create a list of queries

In [7]:
# list of queries\
queries = gcamreader.parse_batch_query(gcam_query_path)
queries

[<gcamreader.querymi.Query at 0x1054ae450>,
 <gcamreader.querymi.Query at 0x106683010>,
 <gcamreader.querymi.Query at 0x137ed8610>,
 <gcamreader.querymi.Query at 0x137eda690>,
 <gcamreader.querymi.Query at 0x137ed9b10>,
 <gcamreader.querymi.Query at 0x137f9b850>,
 <gcamreader.querymi.Query at 0x137f9bc90>,
 <gcamreader.querymi.Query at 0x137f9b810>,
 <gcamreader.querymi.Query at 0x137f9b750>,
 <gcamreader.querymi.Query at 0x137f9b710>,
 <gcamreader.querymi.Query at 0x137f9bbd0>,
 <gcamreader.querymi.Query at 0x137f9b890>,
 <gcamreader.querymi.Query at 0x137f9b990>,
 <gcamreader.querymi.Query at 0x137f9b910>,
 <gcamreader.querymi.Query at 0x137f9bcd0>,
 <gcamreader.querymi.Query at 0x137f9b690>,
 <gcamreader.querymi.Query at 0x137f9ba10>,
 <gcamreader.querymi.Query at 0x137f9bb10>,
 <gcamreader.querymi.Query at 0x137f9ba50>,
 <gcamreader.querymi.Query at 0x137f9bad0>,
 <gcamreader.querymi.Query at 0x137f9bb50>,
 <gcamreader.querymi.Query at 0x137f9bc10>,
 <gcamreader.querymi.Query at 0x

##### Step 3. Collect the generation data

In [6]:
# net zero generation data
nz_generation = nz_conn.runQuery(get_query_by_name(queries, generation_query_name))

# bau generation data
#bau_generation = bau_conn.runQuery(get_query_by_name(queries, generation_query_name))

Model interface run failed.
Command line: 
	java -cp /Users/mong275/.pyenv/versions/3.11.0/envs/visualization_3.11/lib/python3.11/site-packages/gcamreader/ModelInterface/jars/*:/Users/mong275/.pyenv/versions/3.11.0/envs/visualization_3.11/lib/python3.11/site-packages/gcamreader/ModelInterface/ModelInterface.jar -Xmx4g -Dorg.basex.DBPATH=/Users/mong275/repos/mongird-etal_2024_tbd/data/input_data/gcam_data/GODEEEP_GCAM-USA_Pathways -DModelInterface.SUPPRESS_OUTPUT=True org.basex.BaseX -smethod=csv -scsv=header=yes,format=xquery -i Main_database_basexdb_NetZeroNoCCS_Climate RUN /var/folders/zp/6_04pmq97nq_cq_837pfbx200000gn/T/tmpyylguvot
Query string: 
	<supplyDemandQuery title="elec gen by gen tech and cooling tech (incl cogen)">
        <axis1 name="technology">technology</axis1>
        <axis2 name="Year">physical-output[@vintage]</axis2>
        <xPath buildList="true" dataName="output" group="false" sumAll="false">*[@type='sector' and (@name='electricity' or 
            @name='base 

CalledProcessError: Command '['java', '-cp', '/Users/mong275/.pyenv/versions/3.11.0/envs/visualization_3.11/lib/python3.11/site-packages/gcamreader/ModelInterface/jars/*:/Users/mong275/.pyenv/versions/3.11.0/envs/visualization_3.11/lib/python3.11/site-packages/gcamreader/ModelInterface/ModelInterface.jar', '-Xmx4g', '-Dorg.basex.DBPATH=/Users/mong275/repos/mongird-etal_2024_tbd/data/input_data/gcam_data/GODEEEP_GCAM-USA_Pathways', '-DModelInterface.SUPPRESS_OUTPUT=True', 'org.basex.BaseX', '-smethod=csv', '-scsv=header=yes,format=xquery', '-i', 'Main_database_basexdb_NetZeroNoCCS_Climate', 'RUN', '/var/folders/zp/6_04pmq97nq_cq_837pfbx200000gn/T/tmpyylguvot']' returned non-zero exit status 1.

##### Step 4. Process Data

In [None]:
EXAJOULES_TO_GIGAWATTS = 277.77777777778 * 1000 / 8760

###### Net Zero

In [None]:
generation = nz_gneration

generation['vintage'] = generation['technology'].str.slice(-4).astype(int)
generation['technology'] = generation['technology'].str.slice(0, -10)
generation['subsector.1'] = generation['subsector.1'].str.slice(0, -8)

# convert EJ to GW
generation['value'] = generation['value'] * EXAJOULES_TO_GIGAWATTS
generation = generation.drop(columns=['subsector']).rename(columns={
                        'Year': 'year',
                        'Units': 'units',
                        'value': 'generation_GW',
                        'subsector.1': 'subsector',
                    })
generation = generation[['year', 'vintage', 'region', 'sector', 'subsector', 'technology', 'generation_GW']].reset_index(drop=True)