In [55]:
import pandas
import numpy
import pygsheets
import datetime
import pytz
import re

In [112]:
# define the excel file to save tables in
current_time = datetime.datetime.now(pytz.timezone('US/Eastern')).strftime("%Y-%m-%d_T%H%M%S")

In [113]:
gc = pygsheets.authorize(service_account_env_var='GDRIVE_API_CREDENTIALS')
#spreadsheet = gc.open_by_key('1tcS6Wd-Wp-LTDpLzFgJY_RSNDnbyubW3J_9HKIAys4A')
spreadsheet = gc.open_by_key('1BErBo6m3g5kchbjs8dOyiKEsgS7Tlra7MFLBVkE0WYQ') # Sep 2024 terminals version
#spreadsheet = gc.open_by_key('1MrghwBeCz8Tzgua7CWGg_KoXKVZsV7r0kHMYHYqnNTg') # July 2022 terminals version
#spreadsheet = gc.open_by_key('1FVuw76YU-jmAzfSWJ2ixwBQB_wXEK9aGCa4Q6OCOUzY') # July 2022 with May 2023 updates for Asia gas tracker

terms_df_orig = spreadsheet.worksheet('title', 'Terminals').get_as_df(start='A3')

# drop any empty/imcomplete rows (those without a wiki page)
terms_df_orig = terms_df_orig.loc[terms_df_orig.Wiki != '']

#get other relevant sheets
owners_df_orig = spreadsheet.worksheet('title', 'Terminal operators/owners (1/3)').get_as_df(start='A2')
owners_df_orig.set_index('ComboID', inplace=True)

parent_metadata_df = spreadsheet.worksheet('title', 'Parent metadata (3/3)').get_as_df(start='A2')
parent_metadata_df.set_index('Parent', inplace=True)

In [114]:
terms_df_orig.replace('--', numpy.nan, inplace=True)

  terms_df_orig.replace('--', numpy.nan, inplace=True)


In [115]:
region_df_orig = spreadsheet.worksheet('title', 'Country dictionary').get_as_df(start='A2')
#region_df_agt = region_df_orig.loc[region_df_orig['AsiaGasTracker']=='Yes']

#region_name = 'Global'; region_df_touse = region_df_orig.copy()
#region_name = 'AsiaGasTracker'; region_df_touse = region_df_orig.loc[region_df_orig.AsiaGasTracker=='Yes']
#region_name = 'EuroGasTracker'; region_df_touse = region_df_orig.loc[region_df_orig.EuroGasTracker=='Yes']
region_name = 'AfricaGasTracker'; region_df_touse = region_df_orig.loc[region_df_orig.AfricaGasTracker=='Yes']
#region_df_agt.copy()

## name the output file based on region

In [116]:
excel_writer = pandas.ExcelWriter(region_name+'-LNG-Terminals-SummarySheets-'+current_time+'.xlsx')

In [117]:
region_df_touse_cleaned = region_df_touse.loc[(region_df_touse.Region!='--')&
                                            (region_df_touse.SubRegion!='--')]
multiindex_region_subregion = region_df_touse_cleaned.groupby(['Region','SubRegion'])['Country'].count().index
multiindex_region_subregion

MultiIndex([('Africa',    'Northern Africa'),
            ('Africa', 'Sub-Saharan Africa')],
           names=['Region', 'SubRegion'])

### create specific dataframes for region, country_ratios, owners_df

In [118]:
owners_df_touse = owners_df_orig.loc[owners_df_orig['Country'].isin(region_df_touse['Country'].tolist())]
terms_df_touse = terms_df_orig.loc[terms_df_orig['Country'].isin(region_df_touse['Country'].tolist())]

In [119]:
status_list = ['Proposed', 
               'Construction', 
               'Shelved', 
               'Cancelled', 
               'Operating', 
               'Idle', 
               'Mothballed', 
               'Retired']
country_list = sorted(set(region_df_touse['Country'].tolist()))
region_list = sorted(set(region_df_touse['Region'].tolist()))
subregion_list = sorted(set(region_df_touse['SubRegion'].tolist()))

In [120]:
excel_status_list = ['Proposed', 
                     'Construction', 
                     'In Development (Proposed + Construction)', 
                     'Shelved', 
                     'Cancelled', 
                     'Operating', 
                     'Idle', 
                     'Mothballed', 
                     'Retired']
excel_status_list_with_countries = ['Country']+excel_status_list

In [121]:
# regions - subregions
#############################
# Africa - Sub-Saharan Africa
# Africa - Northern Africa
# Americas - Latin America and the Caribbean
# Americas - North America
# Asia - Southern Asia
# Asia - Western Asia
# Asia - South-eastern Asia
# Asia - East Asia
# Asia - Central Asia
# Europe - Southern Europe
# Europe - Eastern Europe
# Europe - Western Europe
# Europe - Northern Europe
# Oceania	- Polynesia
# Oceania	- Australia and New Zealand
# Oceania	- Melanesia
# Oceania	- Micronesia

### no. export projects by country/region, project status

In [122]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['FacilityType']=='Export') & (terms_df_touse['Fuel']=='LNG')]

nprojects_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
nprojects_by_region = pandas.DataFrame(0, columns=status_list, index=multiindex_region_subregion)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    nprojects_by_country[status] = terms_df_subset_status.groupby('Country').size()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    nprojects_by_region[status] = terms_df_subset_status.groupby(['Region','SubRegion']).size()

# fille NaN with 0.0
nprojects_by_region = nprojects_by_region.fillna(0)
nprojects_by_country = nprojects_by_country.fillna(0)

nprojects_by_region['In Development (Proposed + Construction)'] = nprojects_by_region[['Proposed','Construction']].sum(axis=1)
nprojects_by_country['In Development (Proposed + Construction)'] = nprojects_by_country[['Proposed','Construction']].sum(axis=1)

nprojects_by_country = nprojects_by_country[excel_status_list]
nprojects_by_region = nprojects_by_region[excel_status_list]

nprojects_by_region.index.names = ['Region','Subregion']
nprojects_by_country.index.name = 'Country'

nprojects_by_region.loc['Total',:] = nprojects_by_region.sum(axis=0).values
nprojects_by_country.loc['Total',:] = nprojects_by_country.sum(axis=0).values

#nprojects_by_region = nprojects_by_region.loc[~(nprojects_by_region==0).all(axis=1)]
nprojects_by_country = nprojects_by_country.loc[~(nprojects_by_country==0).all(axis=1)]

nprojects_by_region.replace(0,'',inplace=True)
nprojects_by_country.replace(0,'',inplace=True)

nprojects_by_region.to_excel(excel_writer, sheet_name='LNG export projects by region')#, index=False)
nprojects_by_country.to_excel(excel_writer, sheet_name='LNG export projects by country')#, index=False)

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


### no. import projects by country/region, project status

In [123]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['FacilityType']=='Import') & (terms_df_touse['Fuel']=='LNG')]

nprojects_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
nprojects_by_region = pandas.DataFrame(0, columns=status_list, index=multiindex_region_subregion)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    nprojects_by_country[status] = terms_df_subset_status.groupby('Country').size()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    nprojects_by_region[status] = terms_df_subset_status.groupby(['Region','SubRegion']).size()

# fille NaN with 0.0
nprojects_by_region = nprojects_by_region.fillna(0)
nprojects_by_country = nprojects_by_country.fillna(0)

nprojects_by_region['In Development (Proposed + Construction)'] = nprojects_by_region[['Proposed','Construction']].sum(axis=1)
nprojects_by_country['In Development (Proposed + Construction)'] = nprojects_by_country[['Proposed','Construction']].sum(axis=1)

nprojects_by_country = nprojects_by_country[excel_status_list]
nprojects_by_region = nprojects_by_region[excel_status_list]

nprojects_by_region.index.names = ['Region','Subregion']
nprojects_by_country.index.name = 'Country'

nprojects_by_region.loc['Total',:] = nprojects_by_region.sum(axis=0).values
nprojects_by_country.loc['Total',:] = nprojects_by_country.sum(axis=0).values

#nprojects_by_region = nprojects_by_region.loc[~(nprojects_by_region==0).all(axis=1)]
nprojects_by_country = nprojects_by_country.loc[~(nprojects_by_country==0).all(axis=1)]

nprojects_by_region.replace(0,'',inplace=True)
nprojects_by_country.replace(0,'',inplace=True)

nprojects_by_region.to_excel(excel_writer, sheet_name='LNG import projects by region')#, index=False)
nprojects_by_country.to_excel(excel_writer, sheet_name='LNG import projects by country')#, index=False)

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


### capacity (mtpa) of import projects by country/region, project status

In [124]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['FacilityType']=='Import') & (terms_df_touse['Fuel']=='LNG')]

cap_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
cap_by_region = pandas.DataFrame(0, columns=status_list, index=multiindex_region_subregion)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_country[status] = terms_df_subset_status.groupby('Country')['CapacityInMtpa'].sum()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_region[status] = terms_df_subset_status.groupby(['Region','SubRegion'])['CapacityInMtpa'].sum()

#fille NaN with 0.0
cap_by_region = cap_by_region.fillna(0)
cap_by_country = cap_by_country.fillna(0)

cap_by_region['In Development (Proposed + Construction)'] = cap_by_region[['Proposed','Construction']].sum(axis=1)
cap_by_country['In Development (Proposed + Construction)'] = cap_by_country[['Proposed','Construction']].sum(axis=1)

cap_by_country = cap_by_country[excel_status_list]
cap_by_region = cap_by_region[excel_status_list]

cap_by_region.index.names = ['Region','Subregion']
cap_by_country.index.name = 'Country'

cap_by_region.loc['Total',:] = cap_by_region.sum(axis=0).values
cap_by_country.loc['Total',:] = cap_by_country.sum(axis=0).values

#cap_by_region = cap_by_region.loc[~(cap_by_region==0).all(axis=1)]
cap_by_country = cap_by_country.loc[~(cap_by_country==0).all(axis=1)]

cap_by_region.replace(0,'',inplace=True)
cap_by_country.replace(0,'',inplace=True)

cap_by_region.to_excel(excel_writer, 'LNG import capacity by region')
cap_by_country.to_excel(excel_writer, 'LNG import capacity by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


  cap_by_region.to_excel(excel_writer, 'LNG import capacity by region')
  cap_by_country.to_excel(excel_writer, 'LNG import capacity by country')


### capacity (mtpa) of export projects by country/region, project status

In [125]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['FacilityType']=='Export') & (terms_df_touse['Fuel']=='LNG')]

cap_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
cap_by_region = pandas.DataFrame(0, columns=status_list, index=multiindex_region_subregion)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_country[status] = terms_df_subset_status.groupby('Country')['CapacityInMtpa'].sum()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_region[status] = terms_df_subset_status.groupby(['Region','SubRegion'])['CapacityInMtpa'].sum()

#fille NaN with 0.0
cap_by_region = cap_by_region.fillna(0)
cap_by_country = cap_by_country.fillna(0)

cap_by_region['In Development (Proposed + Construction)'] = cap_by_region[['Proposed','Construction']].sum(axis=1)
cap_by_country['In Development (Proposed + Construction)'] = cap_by_country[['Proposed','Construction']].sum(axis=1)

cap_by_country = cap_by_country[excel_status_list]
cap_by_region = cap_by_region[excel_status_list]

cap_by_region.index.names = ['Region','Subregion']
cap_by_country.index.name = 'Country'

cap_by_region.loc['Total',:] = cap_by_region.sum(axis=0).values
cap_by_country.loc['Total',:] = cap_by_country.sum(axis=0).values

#cap_by_region = cap_by_region.loc[~(cap_by_region==0).all(axis=1)]
cap_by_country = cap_by_country.loc[~(cap_by_country==0).all(axis=1)]

cap_by_region.replace(0,'',inplace=True)
cap_by_country.replace(0,'',inplace=True)

cap_by_region.to_excel(excel_writer, 'LNG export capacity by region')
cap_by_country.to_excel(excel_writer, 'LNG export capacity by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


  cap_by_region.to_excel(excel_writer, 'LNG export capacity by region')
  cap_by_country.to_excel(excel_writer, 'LNG export capacity by country')


## owner analysis

In [126]:
# this cell takes the terminals data set (call it "terms_df_all", here we're using the indev terminals subset)
# and loops throw every row, breaks up the parent and % ownership strings,
# and outputs a dataframe owner_parent_calculations_df that has a bunch of different useful info related
# to parent shares, capacity, etc.
owner_parent_calculations_df = pandas.DataFrame() # empty df for storing owner/parent stuff

for idx,row in terms_df_touse.loc[terms_df_touse.Fuel=='LNG'].iterrows():
    parent_string = row.Parent #.values[0]
    #if row.TerminalID=='T0292':
    #    print(parent_string)

    # the two lines below go through the parent entries in the sheet and split them,
    # recording the percents and parents in separate lists, to be dealt with later
    parent_list = re.sub(' \[.*?\]', '', parent_string).split('; ') # all entries must have an "Owner [%]" syntax, so [unknown %] is included
    percent_list = [float(i.rstrip('%'))/100. for i in re.findall('\\d+(?:\\.\\d+)?%', parent_string)]

    # if there are fewer recorded percents than number of parents
    if parent_list.__len__()!=percent_list.__len__():
        # if there are no percents, make them all equal
        
        if percent_list==[]:
            percent_list = [1/parent_list.__len__() for i in parent_list]
        # otherwise, get the difference and distribute the remainder equally among remaining parents
        else:
            #print(row.ComboID)
            nmissing = parent_list.__len__()-percent_list.__len__()
            # distribute nans evenly
            total = numpy.nansum(percent_list)
            leftover = 1-total
            percent_list += [leftover/nmissing]*nmissing
            #print(numpy.array(percent_list).sum())

    # now go through each parent and associated percent ownership
    # and store it alongside the project's name along with capacity, status, region, etc.
    # this is stored inside the big dataframe owner_parent_calculations_df
    for p_idx,parent in enumerate(parent_list):
        #if row.TerminalID=='T0292':
        #    print(parent_list)
        # this if statement should not really be used
        if parent not in parent_metadata_df.index.to_list():
            print(parent, " doesn't exist as a parent")
            parent_metadata_df.loc[parent,'ParentHQCountry']='unknown'
        owner_parent_calculations_df = pandas.concat([owner_parent_calculations_df, 
                                                      pandas.DataFrame([{'Parent':parent, 'ComboID':row.ComboID,
                                                                         'TerminalID':row.TerminalID,
                                                                         'FractionOwnership':percent_list[p_idx],
                                                                         'PercentString':'{:.2f}%'.format(percent_list[p_idx]*100),
                                                                         'ParentHQCountry':parent_metadata_df.loc[parent_metadata_df.index==parent]['ParentHQCountry'].values[0],
                                                                         'ParentHQRegion':parent_metadata_df.loc[parent_metadata_df.index==parent]['ParentHQRegion'].values[0],
                                                                         'TerminalCountry':row.Country,
                                                                         'TerminalRegion':row.Region,
                                                                         'Status':row.Status,
                                                                         'FacilityType':row['FacilityType'],
                                                                         'CapacityInMtpa':row.CapacityInMtpa,
                                                                         'ProjectWiki':row.Wiki,
                                                                         'ProjectName':row.TerminalName+' '+str(row.UnitName)}])])

owner_parent_calculations_df.reset_index(drop=True, inplace=True)
owner_parent_calculations_df['CapacityOwnedInMtpa'] = owner_parent_calculations_df.FractionOwnership*owner_parent_calculations_df.CapacityInMtpa
owner_parent_calculations_df['ParentCapacityString'] = owner_parent_calculations_df.Parent + ' (' + owner_parent_calculations_df.CapacityOwnedInMtpa.round(1).astype(str).replace('nan','unknown') + ' mtpa)'

In [127]:
owner_parent_calculations_df.loc[owner_parent_calculations_df.TerminalID=='T0292'].groupby(
    ['Parent','ParentHQCountry','ComboID','Status']).agg({'Status':['count']}).unstack().droplevel(axis=1, level=[0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,Status
Parent,ParentHQCountry,ComboID


### nprojects export by parent company, project status

In [128]:
owners_nprojects_by_status_df = \
    owner_parent_calculations_df.loc[owner_parent_calculations_df['FacilityType']=='Export'].groupby(
    ['Parent','ParentHQCountry','Status'])[['FractionOwnership']].sum().unstack().droplevel(axis=1, level=[0])

owners_nprojects_by_status_df = owners_nprojects_by_status_df.reindex(columns=status_list)
owners_nprojects_by_status_df = owners_nprojects_by_status_df.reset_index().set_index('Parent')
owners_nprojects_by_status_df.columns.name = None

owners_nprojects_by_status_df['In Development (Proposed + Construction)'] = owners_nprojects_by_status_df[['Proposed','Construction']].sum(axis=1)

owners_nprojects_by_status_df = owners_nprojects_by_status_df.rename(columns={'Parent':'Parent Company',
                                                                          'ParentHQCountry':'Country'})
# rearrange the order of the columns for output
owners_nprojects_by_status_df = owners_nprojects_by_status_df[excel_status_list_with_countries]

# totals_row = owners_nprojects_by_status_df.sum(axis=0, min_count=0)
# totals_row.name = 'Total'
# owners_nprojects_by_status_df = owners_nprojects_by_status_df.append(totals_row)
owners_nprojects_by_status_df.loc['Total',:] = owners_nprojects_by_status_df.sum(axis=0, min_count=0).values
owners_nprojects_by_status_df.loc['Total','Country'] = ''

owners_nprojects_by_status_df = owners_nprojects_by_status_df.replace(numpy.nan, '')
owners_nprojects_by_status_df = owners_nprojects_by_status_df.replace(0, '')

owners_nprojects_by_status_df.to_excel(excel_writer, 'LNG export projects by owner')

  owners_nprojects_by_status_df.to_excel(excel_writer, 'LNG export projects by owner')


### nprojects import by parent company, project status

In [129]:
owners_nprojects_by_status_df = \
    owner_parent_calculations_df.loc[owner_parent_calculations_df['FacilityType']=='Import'].groupby(
    ['Parent','ParentHQCountry','Status'])[['FractionOwnership']].sum().unstack().droplevel(axis=1, level=[0])

owners_nprojects_by_status_df = owners_nprojects_by_status_df.reindex(columns=status_list)
owners_nprojects_by_status_df = owners_nprojects_by_status_df.reset_index().set_index('Parent')
owners_nprojects_by_status_df.columns.name = None

owners_nprojects_by_status_df['In Development (Proposed + Construction)'] = owners_nprojects_by_status_df[['Proposed','Construction']].sum(axis=1)

owners_nprojects_by_status_df = owners_nprojects_by_status_df.rename(columns={'Parent':'Parent Company',
                                                                          'ParentHQCountry':'Country'})
# rearrange the order of the columns for output
owners_nprojects_by_status_df = owners_nprojects_by_status_df[excel_status_list_with_countries]

# totals_row = owners_nprojects_by_status_df.sum(axis=0, min_count=0)
# totals_row.name = 'Total'
# owners_nprojects_by_status_df = owners_nprojects_by_status_df.append(totals_row)
owners_nprojects_by_status_df.loc['Total',:] = owners_nprojects_by_status_df.sum(axis=0, min_count=0).values
owners_nprojects_by_status_df.loc['Total','Country'] = ''

owners_nprojects_by_status_df = owners_nprojects_by_status_df.replace(numpy.nan, '')
owners_nprojects_by_status_df = owners_nprojects_by_status_df.replace(0, '')

owners_nprojects_by_status_df.to_excel(excel_writer, 'LNG import projects by owner')

  owners_nprojects_by_status_df.to_excel(excel_writer, 'LNG import projects by owner')


### capacity export by parent company, project status

In [130]:
owners_capacity_by_status_df = \
    owner_parent_calculations_df.loc[owner_parent_calculations_df['FacilityType']=='Export'].groupby(
    ['Parent','ParentHQCountry','Status']).agg({'CapacityOwnedInMtpa':['sum']}).unstack().droplevel(axis=1, level=[0,1])

owners_capacity_by_status_df = owners_capacity_by_status_df.reindex(columns=status_list)
owners_capacity_by_status_df = owners_capacity_by_status_df.reset_index().set_index('Parent')
owners_capacity_by_status_df.columns.name = None

owners_capacity_by_status_df['In Development (Proposed + Construction)'] = owners_capacity_by_status_df[['Proposed','Construction']].sum(axis=1)

owners_capacity_by_status_df = owners_capacity_by_status_df.rename(columns={'Parent':'Parent Company',
                                                                          'ParentHQCountry':'Country'})
# rearrange the order of the columns for output
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list_with_countries]

# totals_row = owners_capacity_by_status_df.sum(axis=0, min_count=0)
# totals_row.name = 'Total'
# owners_capacity_by_status_df = owners_capacity_by_status_df.append(totals_row)
owners_capacity_by_status_df.loc['Total',:] = owners_capacity_by_status_df.sum(axis=0, min_count=0).values
owners_capacity_by_status_df.loc['Total','Country'] = ''

owners_capacity_by_status_df = owners_capacity_by_status_df.replace(numpy.nan, '')
owners_capacity_by_status_df = owners_capacity_by_status_df.replace(0, '')

owners_capacity_by_status_df.to_excel(excel_writer, 'LNG export capacity by owner')

  owners_capacity_by_status_df.to_excel(excel_writer, 'LNG export capacity by owner')


### capacity import by parent company, project status

In [131]:
owners_capacity_by_status_df = \
    owner_parent_calculations_df.loc[owner_parent_calculations_df['FacilityType']=='Import'].groupby(
    ['Parent','ParentHQCountry','Status']).agg({'CapacityOwnedInMtpa':['sum']}).unstack().droplevel(axis=1, level=[0,1])

owners_capacity_by_status_df = owners_capacity_by_status_df.reindex(columns=status_list)
owners_capacity_by_status_df = owners_capacity_by_status_df.reset_index().set_index('Parent')
owners_capacity_by_status_df.columns.name = None

owners_capacity_by_status_df['In Development (Proposed + Construction)'] = owners_capacity_by_status_df[['Proposed','Construction']].sum(axis=1)

owners_capacity_by_status_df = owners_capacity_by_status_df.rename(columns={'Parent':'Parent Company',
                                                                          'ParentHQCountry':'Country'})
# rearrange the order of the columns for output
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list_with_countries]

# totals_row = owners_capacity_by_status_df.sum(axis=0, min_count=0)
# totals_row.name = 'Total'
# owners_capacity_by_status_df = owners_capacity_by_status_df.append(totals_row)
owners_capacity_by_status_df.loc['Total',:] = owners_capacity_by_status_df.sum(axis=0, min_count=0).values
owners_capacity_by_status_df.loc['Total','Country'] = ''

owners_capacity_by_status_df = owners_capacity_by_status_df.replace(numpy.nan, '')
owners_capacity_by_status_df = owners_capacity_by_status_df.replace(0, '')

owners_capacity_by_status_df.to_excel(excel_writer, 'LNG import capacity by owner')

  owners_capacity_by_status_df.to_excel(excel_writer, 'LNG import capacity by owner')


### no. of operating terminals by start year, facility type (1980–2023)

In [132]:
year_indices = list(range(1980,2025))

terms_by_start_year_df = pandas.DataFrame(0.0, index=year_indices, columns=['Import terminals', 'Export terminals'])

terms_started_df = terms_df_touse[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Fuel']=='LNG') & (terms_df_touse['FacilityType']=='Import')]
terms_by_start_year_df['Import terminals'] = terms_started_df.groupby('StartYearEarliest').size()

terms_started_df = terms_df_touse[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Fuel']=='LNG') & (terms_df_touse['FacilityType']=='Export')]
terms_by_start_year_df['Export terminals'] = terms_started_df.groupby('StartYearEarliest').size()

terms_by_start_year_df.index.name = 'Start year'

terms_by_start_year_df.loc['Total',:] = terms_by_start_year_df.sum(axis=0).values

terms_by_start_year_df.replace(numpy.nan,'',inplace=True)

terms_by_start_year_df.to_excel(excel_writer, 'LNG projects by start year')

  terms_by_start_year_df.to_excel(excel_writer, 'LNG projects by start year')


### capacity of operating terminals by start year, facility type (1980–2021)

In [133]:
year_indices = list(range(1980,2025))

capacity_by_start_year_df = pandas.DataFrame(0.0, index=year_indices, columns=['Import terminals', 'Export terminals'])

terms_started_df = terms_df_touse.loc[(terms_df_touse['Status'].isin(['Operating'])) &
                                      (terms_df_touse['Fuel']=='LNG') & 
                                      (terms_df_touse['FacilityType']=='Import')]
capacity_by_start_year_df['Import terminals'] = terms_started_df.groupby('StartYearEarliest')['CapacityInMtpa'].sum()

terms_started_df = terms_df_touse.loc[(terms_df_touse['Status'].isin(['Operating'])) &
                                      (terms_df_touse['Fuel']=='LNG') & 
                                      (terms_df_touse['FacilityType']=='Export')]
capacity_by_start_year_df['Export terminals'] = terms_started_df.groupby('StartYearEarliest')['CapacityInMtpa'].sum(min_count=0)

capacity_by_start_year_df.loc['Total',:] = capacity_by_start_year_df.sum(axis=0).values

capacity_by_start_year_df.index.name = 'Start year'
capacity_by_start_year_df.replace(numpy.nan,'',inplace=True)

# totals_row = capacity_by_start_year_df.sum(axis=0)
# totals_row.name = 'Total'
# capacity_by_start_year_df = capacity_by_start_year_df.append(totals_row)

capacity_by_start_year_df.to_excel(excel_writer, 'LNG capacity by start year')

  capacity_by_start_year_df.to_excel(excel_writer, 'LNG capacity by start year')


# save excel file

In [134]:
excel_writer.close()

In [135]:
print(current_time)

2024-10-15_T165635


# stats

In [136]:
# number of projects tracked in total
print(terms_df_touse.shape[0], 'LNG terminal projects tracked')
print(terms_df_touse['CapacityInMtpa'].sum(), 'mtpa tracked')

121 LNG terminal projects tracked
282.76 mtpa tracked


## operating LNG terminals

In [137]:
print(terms_df_touse.loc[terms_df_touse.Status=='Operating'].shape[0], 'operating LNG terminal projects tracked')

32 operating LNG terminal projects tracked


## in dev LNG terminals

In [138]:
print(terms_df_touse.loc[terms_df_touse.Status.isin(['Proposed','Construction'])].shape[0], 'in dev LNG terminal projects tracked')

45 in dev LNG terminal projects tracked


In [139]:
print(terms_df_touse.loc[terms_df_touse.Status.isin(['Proposed','Construction'])].CapacityInMtpa.sum(), 'in dev mtpa tracked')

137.19 in dev mtpa tracked
