In [442]:
import pandas
pandas.set_option("display.max_rows", 50, "display.max_columns", 500)

import numpy
import pygsheets
import re
import datetime

In [443]:
pandas.io.formats.excel.ExcelFormatter.header_style = None
excel_writer = pandas.ExcelWriter('GGIT-Terminals-SummarySheets-'+str(datetime.date.today())+'.xlsx', engine='xlsxwriter')

import Terminals_Current dataset

In [444]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')
spreadsheet = gc.open_by_key('1tcS6Wd-Wp-LTDpLzFgJY_RSNDnbyubW3J_9HKIAys4A')

#spreadsheet[1] "Gas Pipelines" tab is the second index
terms_df_orig = spreadsheet.worksheet('title', 'Terminals').get_as_df(start='A2')
terms_df_orig = terms_df_orig.loc[terms_df_orig.Wiki!='']
terms_df_orig = terms_df_orig.loc[terms_df_orig.Type1=='LNG']

In [445]:
#owner_parent_df_orig = spreadsheet.worksheet('title', 'Owner/parent formatted').get_as_df()
owner_parent_df_orig = pandas.read_pickle('../owner-parent-scripts/GEM-terminals-owner-parent-strings-2022-08-10.pickle')
owner_parent_df_orig = owner_parent_df_orig.loc[owner_parent_df_orig.index.isin(terms_df_orig.ComboID)]

parents_df = spreadsheet.worksheet('title', 'Parent metadata (3/3)').get_as_df(start='A2')
parents_df = parents_df.loc[parents_df.Parent!='']

In [446]:
region_df_orig = spreadsheet.worksheet('title', 'Region dictionary').get_as_df()

In [447]:
terms_df_orig.replace('--', numpy.nan, inplace=True)
owners_df_orig.replace('--', numpy.nan, inplace=True)
owners_df_orig.replace('', numpy.nan, inplace=True)

In [448]:
region_list = sorted(list(set(region_df_orig['Region'])))
# subset only countries with actual regions associated with them
country_list = sorted(list(set(region_df_orig.loc[region_df_orig['Region']!='']['Country'])))

# landing page numbers

In [453]:
terms_df_orig.shape

(1172, 77)

In [457]:
terms_df_orig['CapacityInMtpa'].sum()

4393.73

### use this to subset regions if necessary

In [423]:
owners_df_touse = owners_df_orig.copy()#[owners_df_orig['Country'].str.contains(
                                       #     '|'.join(region_df_touse['Country'].tolist()))]

terms_df_touse = terms_df_orig.copy()#[terms_df_orig['Country'].str.contains(
                                     #       '|'.join(region_df_touse['Country'].tolist()))]

### set up info to analyze Owners tab

In [424]:
owner_pct_col_names = []
owner_col_names = []

parent_pct_col_names = []
parent_col_names = []

for num in range(1,10+1):
    owner_pct_col = f'Owner{num}%'
    owner_pct_col_names.append(owner_pct_col)
    
    owner_col = f'Owner{num}'
    owner_col_names.append(owner_col)
    
    parent_pct_col = f'Parent{num}%'
    parent_pct_col_names.append(parent_pct_col)
    
    parent_col = f'Parent{num}'
    parent_col_names.append(parent_col)

In [425]:
status_list = ['Proposed', 
               'Construction', 
               'Shelved', 
               'Cancelled', 
               'Operating', 
               'Idle', 
               'Mothballed', 
               'Retired']

In [426]:
excel_status_list = ['Proposed', 
                     'Construction', 
                     'In Development (Proposed + Construction)', 
                     'Shelved', 
                     'Cancelled', 
                     'Operating', 
                     'Idle', 
                     'Mothballed', 
                     'Retired']

### no. export trains by country/region, project status

In [427]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['Facility']=='Export') & (terms_df_touse['Type1']=='LNG')]

ntrains_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
ntrains_by_region = pandas.DataFrame(0, columns=status_list, index=region_list)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    ntrains_by_country[status] = terms_df_subset_status.groupby('Country').size()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    ntrains_by_region[status] = terms_df_subset_status.groupby('Region').size()

# fille NaN with 0.0
ntrains_by_region = ntrains_by_region.fillna(0)
ntrains_by_country = ntrains_by_country.fillna(0)

ntrains_by_region['In Development (Proposed + Construction)'] = ntrains_by_region[['Proposed','Construction']].sum(axis=1)
ntrains_by_country['In Development (Proposed + Construction)'] = ntrains_by_country[['Proposed','Construction']].sum(axis=1)

ntrains_by_country = ntrains_by_country[excel_status_list]
ntrains_by_region = ntrains_by_region[excel_status_list]

ntrains_by_region.index.name = 'Region'
ntrains_by_country.index.name = 'Country'

totals_row = ntrains_by_region.sum(axis=0)
totals_row.name = 'Total'
ntrains_by_region = ntrains_by_region.append(totals_row)

totals_row = ntrains_by_country.sum(axis=0)
totals_row.name = 'Total'
ntrains_by_country = ntrains_by_country.append(totals_row)

ntrains_by_region.to_excel(excel_writer, sheet_name='LNG export trains by region')
ntrains_by_country.to_excel(excel_writer, sheet_name='LNG export trains by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


### no. export trains by country/region, project status - INCLUDING RE-EXPORTING FACILITIES

### no. import trains by country/region, project status

In [428]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['Facility']=='Import') & (terms_df_touse['Type1']=='LNG')]

ntrains_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
ntrains_by_region = pandas.DataFrame(0, columns=status_list, index=region_list)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    ntrains_by_country[status] = terms_df_subset_status.groupby('Country').size()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    ntrains_by_region[status] = terms_df_subset_status.groupby('Region').size()

# fille NaN with 0.0
ntrains_by_region = ntrains_by_region.fillna(0)
ntrains_by_country = ntrains_by_country.fillna(0)

ntrains_by_region['In Development (Proposed + Construction)'] = ntrains_by_region[['Proposed','Construction']].sum(axis=1)
ntrains_by_country['In Development (Proposed + Construction)'] = ntrains_by_country[['Proposed','Construction']].sum(axis=1)

ntrains_by_country = ntrains_by_country[excel_status_list]
ntrains_by_region = ntrains_by_region[excel_status_list]

ntrains_by_region.index.name = 'Region'
ntrains_by_country.index.name = 'Country'

totals_row = ntrains_by_region.sum(axis=0)
totals_row.name = 'Total'
ntrains_by_region = ntrains_by_region.append(totals_row)

totals_row = ntrains_by_country.sum(axis=0)
totals_row.name = 'Total'
ntrains_by_country = ntrains_by_country.append(totals_row)

ntrains_by_country = ntrains_by_country.applymap(int).applymap(str)
ntrains_by_region = ntrains_by_region.applymap(int).applymap(str)

ntrains_by_region.to_excel(excel_writer, sheet_name='LNG import trains by region')
ntrains_by_country.to_excel(excel_writer, sheet_name='LNG import trains by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


### capacity (mtpa) of import trains by country/region, project status

In [429]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['Facility']=='Import') & (terms_df_touse['Type1']=='LNG')]

cap_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
cap_by_region = pandas.DataFrame(0, columns=status_list, index=region_list)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_country[status] = terms_df_subset_status.groupby('Country')['CapacityInMtpa'].sum()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_region[status] = terms_df_subset_status.groupby('Region')['CapacityInMtpa'].sum()

#fille NaN with 0.0
cap_by_region = cap_by_region.fillna(0)
cap_by_country = cap_by_country.fillna(0)

cap_by_region['In Development (Proposed + Construction)'] = cap_by_region[['Proposed','Construction']].sum(axis=1)
cap_by_country['In Development (Proposed + Construction)'] = cap_by_country[['Proposed','Construction']].sum(axis=1)

cap_by_country = cap_by_country[excel_status_list]
cap_by_region = cap_by_region[excel_status_list]

cap_by_region.index.name = 'Region'
cap_by_country.index.name = 'Country'

totals_row = cap_by_region.sum(axis=0)
totals_row.name = 'Total'
cap_by_region = cap_by_region.append(totals_row)

totals_row = cap_by_country.sum(axis=0)
totals_row.name = 'Total'
cap_by_country = cap_by_country.append(totals_row)

cap_by_region.to_excel(excel_writer, sheet_name='LNG import capacity by region')
cap_by_country.to_excel(excel_writer, sheet_name='LNG import capacity by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


### capacity (mtpa) of export trains by country/region, project status

In [430]:
terms_df_subset = terms_df_touse.copy()[(terms_df_touse['Facility']=='Export') & (terms_df_touse['Type1']=='LNG')]

cap_by_country = pandas.DataFrame(0, columns=status_list, index=country_list)
cap_by_region = pandas.DataFrame(0, columns=status_list, index=region_list)

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_country[status] = terms_df_subset_status.groupby('Country')['CapacityInMtpa'].sum()

print('===country-level calculations===')
for status in status_list:
    print(status)
    terms_df_subset_status = terms_df_subset.copy()[terms_df_subset['Status']==status]
    cap_by_region[status] = terms_df_subset_status.groupby('Region')['CapacityInMtpa'].sum()

#fille NaN with 0.0
cap_by_region = cap_by_region.fillna(0)
cap_by_country = cap_by_country.fillna(0)

cap_by_region['In Development (Proposed + Construction)'] = cap_by_region[['Proposed','Construction']].sum(axis=1)
cap_by_country['In Development (Proposed + Construction)'] = cap_by_country[['Proposed','Construction']].sum(axis=1)

cap_by_country = cap_by_country[excel_status_list]
cap_by_region = cap_by_region[excel_status_list]

cap_by_region.index.name = 'Region'
cap_by_country.index.name = 'Country'

totals_row = cap_by_region.sum(axis=0)
totals_row.name = 'Total'
cap_by_region = cap_by_region.append(totals_row)

totals_row = cap_by_country.sum(axis=0)
totals_row.name = 'Total'
cap_by_country = cap_by_country.append(totals_row)

cap_by_region.to_excel(excel_writer, sheet_name='LNG export capacity by region')
cap_by_country.to_excel(excel_writer, sheet_name='LNG export capacity by country')

===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired
===country-level calculations===
Proposed
Construction
Shelved
Cancelled
Operating
Idle
Mothballed
Retired


## parent analysis
### relies on parent-owner script already being run/completed

In [431]:
terms_df_orig['Owner']
terms_df_orig['Parent'].isna().sum()

20

In [441]:
owner_parent_calculations_df = pandas.DataFrame()#columns=['Parent','ComboID','FractionOwnership','CapacityInMtpa'])#index=terms_df_orig.ComboID)#, columns = ['Country','Region','CapacityInMtpa'])

terms_df_with_ownrshp = terms_df_orig.loc[~terms_df_orig.Parent.isnull()]
terms_df_without_ownrshp = terms_df_orig.loc[terms_df_orig.Parent.isnull()]

for idx,row in terms_df_with_ownrshp.iterrows():
    parent_string = row.Parent
    #print(parent_list)
    parent_list = re.sub(' \[\\d+(?:\\.\\d+)?%\]', '', parent_string).split('; ')
    percent_list = [float(i.rstrip('%'))/100. for i in re.findall('\\d+(?:\\.\\d+)?%', parent_string)]
    
    print(parent_string)

    if parent_list.__len__()!=percent_list.__len__():
        if percent_list.__len__()==0:
            #print(parent_list)
            #print(percent_list)
            percent_list = [1/parent_list.__len__() for i in parent_list]
            #print(percent_list)
            for p_idx,parent in enumerate(parent_list):
                owner_parent_calculations_df = pandas.concat([owner_parent_calculations_df, 
                                                              pandas.DataFrame([{'Parent':parent, 'ComboID':row.ComboID, 
                                                                                 'FractionOwnership':percent_list[p_idx],
                                                                                 'CapacityInMtpa':row.CapacityInMtpa}])])
        else:
            print(percent_list)
            print(parent_list)

    for p_idx,parent in enumerate(parent_list):
        #print(percent_list)
        owner_parent_calculations_df = pandas.concat([owner_parent_calculations_df, 
                                                      pandas.DataFrame([{'Parent':parent, 'ComboID':row.ComboID, 
                                                                         'FractionOwnership':percent_list[p_idx],
                                                                         'CapacityInMtpa':row.CapacityInMtpa}])])
    
    #if parent_string
    #    pass#print(parent_list)
    #else:
    #    print(parent_list)
    


#owner_parent_calculations_df.index.name = 'ComboID'
#owner_parent_calculations_df['Parent']
#owner_parent_calculations_df['FractionOwnership']

Energy World [100.0%]
Energy World [100.0%]
State of Alaska [100.0%]
State of Alaska [100.0%]
State of Alaska [100.0%]
Highstar Capital IV; WesPac Energy LLC
Softbank [100.0%]
Fortress Investment Group [100.0%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Black & Veatch Holding Company [4.48%]; Black & Veatch Holding Company [10.5%]; Black & Veatch Holding Company [80.55%]
Venture Global LNG [100.0%]
Venture Global LNG [100.0%]
Venture Globa

IndexError: list index out of range

In [414]:
owner_parent_calculations_df.loc[owner_parent_calculations_df.ComboID=='T031600']

Unnamed: 0,Parent,ComboID,FractionOwnership,CapacityInMtpa
0,Mitsubishi,T031600,0.333333,2.7
0,Sojitz Corporation,T031600,0.333333,2.7
0,Petronet LNG,T031600,0.333333,2.7
0,Mitsubishi,T031600,0.333333,2.7
0,Sojitz Corporation,T031600,0.333333,2.7
0,Petronet LNG,T031600,0.333333,2.7


In [None]:
re.sub(' \[.*\]', '', parent_string.split('; ')[0])

'Woodside Energy'

In [None]:
re.sub(' \[\\d+(?:\\.\\d+)?%\]', '', parent_string.split('; ')[0])

'Woodside Energy'

In [None]:
# sub anything within brackets and a percent
re.sub(' \[\\d+(?:\\.\\d+)?%\]', '', 'Hello [50%]; World [50%]').split('; ')

['Hello', 'World']

In [360]:
# sub anything within brackets and a percent
[float(i.rstrip('%'))/100. for i in re.findall('\\d+(?:\\.\\d+)?%', 'Hello, dolly [50%]; World [50%]')]

[0.5, 0.5]

In [344]:
re.search(' \[\\d+(?:\\.\\d+)?%\]', 'Hello [50%]; World [50%]')#parent_string.split('; ')[0])

<re.Match object; span=(5, 11), match=' [50%]'>

In [None]:
type(parent_list)

float

In [213]:
# import all parent and owner rows
# calculate capacity and num of projects with owners, and their percent
# for missing parent company info, if we have owners, do same for those
# for all else missing, label as unknown, and place in a country
# sort ultimate info by country, then owner, then project numbers

In [221]:
for idx,row in owner_parent_df_orig.iterrows():
    parent_list = row.ParentList
    parent_fractions = row.ParentOwnrshpArray
    if 1==1:#parent_list.__len__()!=parent_fractions.__len__():
        print(parent_list)
        print(parent_fractions)
        print()
    # for each parent, calculate its percent ownership
    # presume equal distributio of ownership if there's no data
    #if parent_list.__len__()>0:
    #    for parent in parent_list:
    #        owner_parent_calculations_df.append(

['Energy World']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['Energy World']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['State of Alaska']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['State of Alaska']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['State of Alaska']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['WesPac Energy LLC', 'Highstar Capital IV']
<PandasArray>
[nan, nan]
Length: 2, dtype: float64

['Softbank']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['Fortress Investment Group']
<PandasArray>
[1.0]
Length: 1, dtype: float64

['Exelon Corporation', 'Enbridge', 'Kiewit Corporation', 'Black & Veatch Holding Company']
<PandasArray>
[0.8055, 0.105, 0.044800000000000006, 0.044800000000000006]
Length: 4, dtype: float64

['Exelon Corporation', 'Enbridge', 'Kiewit Corporation', 'Black & Veatch Holding Company']
<PandasArray>
[0.8055, 0.105, 0.044800000000000006, 0.044800000000000006]
Length: 4, dtype: float64

['Exelon Corporation', 'Enbridge', 'Kiewit Corporatio

(0,)

In [59]:
terms_df_orig

Unnamed: 0,TerminalID,ProjectID,ComboID,Wiki,TerminalName,UnitName,Status,Type1,Facility,Country,Researcher,LastUpdated,OtherEnglishNames,Owner,Parent,Type2,ProposalYear,ConstructionYear,StartYear1,StartYear2,StartYear3,Delayed,DelayType,StartYearEarliest,ShelvedYear,CancelledYear,StopYear,ShelvedCancelledStatusType,Capacity,CapacityUnits,CapacityInMtpa,CapacityInBcm/y,Region,Location,Prefecture/District,State/Province,Latitude,Longitude,Accuracy,Source,PowerPlantsSupplied,CostEst,CostEstUnits,CostEstYear,CostEstUSD,FID,FIDYear,ReExport,EuropeTracker,PCINumber,PCI3,PCI4,PCI5,Floating,FloatingVesselName,Opposition,ESJNotes,Defeated,OtherLanguageName,OtherLanguageWikiPage,H2,H2Notes,CCS,CCSNotes,EuroCrisisTracker,ResearcherNotes1,ResearcherNotes2,ResearcherNotes3,ExportFigureClarkWilliamsDerry,IGUWorldLNGReportAppendix2,Rob,CostUSDPerBcm/y,CostUSDPerMtpa,ProposalYearMonth,FIDYearMonth,ConstructionYearMonth,StartYear1YearMonth
0,T0205,1,T020501,https://www.gem.wiki/Fourchon_LNG_Terminal,Fourchon LNG Terminal,Phase 1,Proposed,LNG,Export,United States,RR,2022/04/27,Gulf Coast LNG Terminal,Fourchon LNG LLC [100.0%],Energy World [100.0%],greenfield,2017,,2022,,,Yes,Presumed,2022,,,,,2.0,mtpa,2.00,2.72,North America,Port Fourchon,Lafourche Parish,Louisiana,29.105833,-90.194444,approximate,,,888000000,USD,2017,,Pre-FID,,,,,,,,,,,,,,,,,,,,proposed in Aug 2017,,,,,,,,,,,
1,T0205,2,T020502,https://www.gem.wiki/Fourchon_LNG_Terminal,Fourchon LNG Terminal,Phase 2,Proposed,LNG,Export,United States,RR,2022/04/27,Gulf Coast LNG Terminal,Fourchon LNG LLC [100.0%],Energy World [100.0%],greenfield,2017,,2023,,,Yes,Presumed,2023,,,,,3.0,mtpa,3.00,4.08,North America,Port Fourchon,Lafourche Parish,Louisiana,29.105833,-90.194444,approximate,,,,,,,Pre-FID,,,,,,,,,,,,,,,,,,,,proposed in Aug 2017,,,,,,,,,,,
2,T0206,1,T020601,https://www.gem.wiki/Alaska_South_Central_LNG_...,Alaska LNG Terminal,T1,Proposed,LNG,Export,United States,RR,2022/04/27,Alaska South Central LNG Terminal,Alaska Gasline Development Corporation [100.0%],State of Alaska [100.0%],greenfield,2017,,2025,,,Yes,Presumed,2025,,,,,6.7,mtpa,6.70,9.11,North America,Nikiski,,Alaska,60.707778,-151.262778,approximate,Alaska LNG Pipeline,,,,,,Pre-FID,2021,,,,,,,,,yes,,,,,,,,,,There is a cost estimate of US$38.7 billion fo...,,,,,,,,,,,
3,T0206,2,T020602,https://www.gem.wiki/Alaska_South_Central_LNG_...,Alaska LNG Terminal,T2,Proposed,LNG,Export,United States,RR,2022/04/27,Alaska South Central LNG Terminal,Alaska Gasline Development Corporation [100.0%],State of Alaska [100.0%],greenfield,2017,,2025,,,Yes,Presumed,2025,,,,,6.7,mtpa,6.70,9.11,North America,Nikiski,,Alaska,60.707778,-151.262778,approximate,Alaska LNG Pipeline,,,,,,Pre-FID,2021,,,,,,,,,yes,,,,,,,,,,There is a cost estimate of US$38.7 billion fo...,,,,,,,,,,,
4,T0206,3,T020603,https://www.gem.wiki/Alaska_South_Central_LNG_...,Alaska LNG Terminal,T3,Proposed,LNG,Export,United States,RR,2022/04/27,Alaska South Central LNG Terminal,Alaska Gasline Development Corporation [100.0%],State of Alaska [100.0%],greenfield,2017,,2025,,,Yes,Presumed,2025,,,,,6.7,mtpa,6.70,9.11,North America,Nikiski,,Alaska,60.707778,-151.262778,approximate,Alaska LNG Pipeline,,,,,,Pre-FID,2021,,,,,,,,,yes,,,,,,,,,,There is a cost estimate of US$38.7 billion fo...,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1211,T1082,0,T108200,https://www.gem.wiki/Sinolam_LNG_Terminal,Sinolam LNG Terminal,,Proposed,LNG,Import,Panama,GC,2022/08/01,,Sinolam Smarter Energy [100.0%],Shanghai Gorgeous Investment Development Co Lt...,,2020,,,,,,,,,,,,,,,,Latin America and the Caribbean,Isla Margarita,,Colón,9.384621,-79.88305,approximate,,,,,,,,,,,,,,,,GasLog Singapore (FSU only),,,,Terminal de GNL Sinolam,https://www.gem.wiki/Terminal_de_GNL_Sinolam,,,,,,,,,,,,,,,,,
1212,T1083,0,T108300,https://www.gem.wiki/Antigua_Power_LNG_Terminal,Antigua Power LNG Terminal,,Proposed,LNG,Import,Antigua and Barbuda,GC,2022/08/01,,Caribbean LNG [100.0%],Antigua Power Co Ltd [50.0%],,2021,,2023,,,,,2023,,,,,,,,,Latin America and the Caribbean,Crabbs,,Antigua,17.136679,-61.751593,approximate,,,,,,,,,,,,,,,,,,,,Terminal de GNL Antigua Power,https://www.gem.wiki/Terminal_de_GNL_Antigua_P...,,,,,,,,,,,,,,,,,
1213,T1084,0,T108400,https://www.gem.wiki/Brunsb%C3%BCttel_LNG_Term...,Brunsbüttel LNG Terminal,,Proposed,LNG,Import,Germany,BL,2022/08/02,,Gasunie; Oiltanking; Vopak LNG,Vopak [100.0%],,2017,,2026,,,Yes,Confirmed,2026,,,,,8.0,bcm/y,5.88,8.00,Europe,Brunsbüttel,,Hamburg,53.914001,8.976095,approximate,,,,,,,Pre-FID,2023,,,,,,,,,,,,,,,on TYNDP list as capable of increasing H2%,,,,,,,,,,,,,,,
1214,T1085,0,T108500,https://www.gem.wiki/Woodside_Probolinggo_LNG_...,Woodside Probolinggo LNG Terminal,,Proposed,LNG,Import,Indonesia,RR,2022/08/04,,PT Petrogas Jatim Utama; Woodside Energy,Woodside Energy [100.0%],,,,,,,,,,,,,,,,,,SE Asia,Probolinggo,,East Java,-7.728886,113.217041,approximate,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [52]:
owner_list = []
for column in owner_column_list:
    owner_list+=list(owners_df_orig[column])
owner_list = list(set(owner_list))
# remove empty (no owner)
owner_list.remove(numpy.nan)
unique_owner_list = sorted(owner_list)

#pandas.Series(unique_owner_list).to_excel('AllFuels'+'UniqueOwnersList.xlsx')

## parent analysis

### ntrains export by parent company, project status

In [56]:
owners_df_subset = owners_df_touse.copy()[(owners_df_touse['Facility']=='Export')&
                                          (owners_df_touse['Type1']=='LNG')]

##################################################

owner_list_subset = []
for column in owner_column_list:
    owner_list_subset+=list(owners_df_subset[column])
owner_list_subset = list(set(owner_list_subset))
# remove empty (no owner)
owner_list_subset.remove(numpy.nan)
unique_owner_list_subset = sorted(owner_list_subset)

##################################################
# convert percents to fractions
##################################################
owners_df_fractions = owners_df_subset.copy()

for col in owner_pct_col_names:
    owners_df_fractions[col] = owners_df_fractions[col].apply(lambda x: numpy.nan if x in [numpy.nan] 
                                          else x[:-1]).astype(float)/100
#df['col'] = df['col'].apply(lambda x: np.nan if x in ['-'] else x[:-1]).astype(float)/100


##################################################
# create train count by owner, status
##################################################
no_owner_info_count = 0
owners_ntrains_by_status_df = pandas.DataFrame(0.0, index=unique_owner_list_subset, columns=status_list)

for status in status_list:
    
    owners_df_temporary = owners_df_fractions.copy()[owners_df_fractions['Status']==status]
    
    for idx,row in owners_df_temporary.iterrows():
        #print(row)
        
        ### how many owners are there?
        row_owners = list(row[owner_column_list])
        row_owners = [i for i in row_owners if str(i)!='nan']
        n_owners = row_owners.__len__()
        #print(row_owners)
        #print(n_owners)
        row_fractions = list(row[owner_pct_col_names])
        row_fractions = [i for i in row_fractions if str(i)!='nan']

        ### if there are no owners listed, continue to next loop iteration
        if n_owners==0:
            no_owner_info_count+=1
            continue # doesn't complete code below, goes to next iteration in for loop

        ### now if row fractions is an empty list, create equal fractions list instead
        if row_fractions==[]:
            row_fractions = [1/n_owners]*n_owners

        ntrain_fractions = numpy.array(row_fractions)

        for owner_idx in range(n_owners):
            owners_ntrains_by_status_df.loc[row_owners[owner_idx]][status]+=ntrain_fractions[owner_idx]

        #print()
owners_ntrains_by_status_df.index.name = 'Parent Company'
owners_ntrains_by_status_df['In Development (Proposed + Construction)'] = owners_ntrains_by_status_df[['Proposed','Construction']].sum(axis=1)
owners_ntrains_by_status_df = owners_ntrains_by_status_df[excel_status_list]

# rearrange the order of the columns for output
owners_ntrains_by_status_df = owners_ntrains_by_status_df[excel_status_list]

totals_row = owners_ntrains_by_status_df.sum(axis=0)
totals_row.name = 'Total'
owners_ntrains_by_status_df = owners_ntrains_by_status_df.append(totals_row)

#owners_ntrains_by_status_df.to_excel(excel_writer, sheet_name='LNG export trains by status and owner')

In [57]:
owners_ntrains_by_status_df

Unnamed: 0_level_0,Proposed,Construction,In Development (Proposed + Construction),Shelved,Cancelled,Operating,Idle,Mothballed,Retired
Parent Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A-Property,0.333333,0.0,0.333333,0.0,0.0000,0.0000,0.0,0.0,0.0
AC LNG,0.000000,0.0,0.000000,0.0,1.0000,0.0000,0.0,0.0,0.0
ADNOC LNG,0.000000,0.0,0.000000,0.0,0.0000,3.0000,0.0,0.0,0.0
AECOM Capital,3.000000,0.0,3.000000,0.0,0.0000,0.0000,0.0,0.0,0.0
Abu Dhabi National Oil Company,2.000000,0.0,2.000000,0.0,0.0000,0.0000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
Yamal LNG,0.000000,0.0,0.000000,0.0,0.0000,4.0000,0.0,0.0,0.0
Yemen LNG,0.000000,0.0,0.000000,0.0,0.0000,0.0000,0.0,2.0,0.0
Zhejiang Energy,0.333333,0.0,0.333333,0.0,0.0000,0.0000,0.0,0.0,0.0
Électricité de France,0.000000,0.0,0.000000,0.0,0.2500,0.0000,0.0,0.0,0.0


### ntrains import by parent company, project status

In [64]:
owners_df_subset = owners_df_touse.copy()[(owners_df_touse['Facility']=='Import')&
                                          (owners_df_touse['Type1']=='LNG')]

##################################################

owner_list_subset = []
for column in owner_column_list:
    owner_list_subset+=list(owners_df_subset[column])
owner_list_subset = list(set(owner_list_subset))
# remove empty (no owner)
owner_list_subset.remove(numpy.nan)
unique_owner_list_subset = sorted(owner_list_subset)

##################################################
# convert percents to fractions
##################################################
owners_df_fractions = owners_df_subset.copy()

for col in percent_column_list:
    owners_df_fractions[col] = owners_df_fractions[col].apply(lambda x: numpy.nan if x in [numpy.nan] 
                                          else x[:-1]).astype(float)/100
#df['col'] = df['col'].apply(lambda x: np.nan if x in ['-'] else x[:-1]).astype(float)/100


##################################################
# create train count by owner, status
##################################################
no_owner_info_count = 0
owners_ntrains_by_status_df = pandas.DataFrame(0.0, index=unique_owner_list_subset, columns=status_list)

for status in status_list:
    
    owners_df_temporary = owners_df_fractions.copy()[owners_df_fractions['Status']==status]
    
    for idx,row in owners_df_temporary.iterrows():
        
        #print(row)
        #print()
        
        ### how many owners are there?
        row_owners = list(row[owner_column_list])
        row_owners = [i for i in row_owners if str(i)!='nan']
        n_owners = row_owners.__len__()
        row_fractions = list(row[percent_column_list])
        row_fractions = [i for i in row_fractions if str(i)!='nan']

        ### if there are no owners listed, continue to next loop iteration
        if n_owners==0:
            no_owner_info_count+=1
            continue # doesn't complete the rest of the ifs

        ### now if row fractions is an empty list, create equal fractions list instead
        if row_fractions==[]:
            row_fractions = [1/n_owners]*n_owners

        ntrain_fractions = numpy.array(row_fractions)

        for owner_idx in range(n_owners):
            owners_ntrains_by_status_df.loc[row_owners[owner_idx]][status]+=ntrain_fractions[owner_idx]

owners_ntrains_by_status_df.index.name = 'Parent Company'
owners_ntrains_by_status_df['In Development (Proposed + Construction)'] = owners_ntrains_by_status_df[['Proposed','Construction']].sum(axis=1)
owners_ntrains_by_status_df = owners_ntrains_by_status_df[excel_status_list]

# rearrange the order of the columns for output
owners_ntrains_by_status_df = owners_ntrains_by_status_df[excel_status_list]

totals_row = owners_ntrains_by_status_df.sum(axis=0)
totals_row.name = 'Total'
owners_ntrains_by_status_df = owners_ntrains_by_status_df.append(totals_row)

owners_ntrains_by_status_df.to_excel('GGIT'+'-LNG-ImportTrainsByOwnerAndStatus.xlsx')


### capacity import by parent company, project status

In [65]:
owners_df_subset = owners_df_touse.copy()[(owners_df_touse['Facility']=='Import')&
                                          (owners_df_touse['Type1']=='LNG')]

##################################################

owner_list_subset = []
for column in owner_column_list:
    owner_list_subset+=list(owners_df_subset[column])
owner_list_subset = list(set(owner_list_subset))
# remove empty (no owner)
owner_list_subset.remove(numpy.nan)
unique_owner_list_subset = sorted(owner_list_subset)

##################################################
# convert percents to fractions
##################################################
owners_df_fractions = owners_df_subset.copy()

for col in percent_column_list:
    owners_df_fractions[col] = owners_df_fractions[col].apply(lambda x: numpy.nan if x in [numpy.nan] 
                                          else x[:-1]).astype(float)/100
#df['col'] = df['col'].apply(lambda x: np.nan if x in ['-'] else x[:-1]).astype(float)/100


##################################################
# create train count by owner, status
##################################################
no_owner_info_count = 0
owners_capacity_by_status_df = pandas.DataFrame(0.0, index=unique_owner_list_subset, columns=status_list)

for status in status_list:
    
    owners_df_temporary = owners_df_fractions.copy()[owners_df_fractions['Status']==status]
    
    for idx,row in owners_df_temporary.iterrows():
        
        ### how many owners are there?
        row_owners = list(row[owner_column_list])
        row_owners = [i for i in row_owners if str(i)!='nan']
        n_owners = row_owners.__len__()
        row_fractions = list(row[percent_column_list])
        row_fractions = [i for i in row_fractions if str(i)!='nan']

        ### if there are no owners listed, continue to next loop iteration
        if n_owners==0:
            no_owner_info_count+=1
            continue # doesn't complete the rest of the ifs

        ### now if row fractions is an empty list, create equal fractions list instead
        if row_fractions==[]:
            row_fractions = [1/n_owners]*n_owners

        train_capacity = row['CapacityInMtpa']
        ntrain_capacity_fractions = numpy.array(row_fractions)*train_capacity

        for owner_idx in range(n_owners):
            owners_capacity_by_status_df.loc[row_owners[owner_idx]][status]+=ntrain_capacity_fractions[owner_idx]

owners_capacity_by_status_df.index.name = 'Parent Company'
owners_capacity_by_status_df['In Development (Proposed + Construction)'] = owners_capacity_by_status_df[['Proposed','Construction']].sum(axis=1)
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list]

# rearrange the order of the columns for output
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list]

totals_row = owners_capacity_by_status_df.sum(axis=0)
totals_row.name = 'Total'
owners_capacity_by_status_df = owners_capacity_by_status_df.append(totals_row)

owners_capacity_by_status_df.to_excel('GGIT'+'-LNG-ImportTrainCapacityByOwnerAndStatus.xlsx', na_rep='--')


### capacity export by parent company, project status

In [66]:
owners_df_subset = owners_df_touse.copy()[(owners_df_touse['Facility']=='Export')&
                                          (owners_df_touse['Type1']=='LNG')]

##################################################

owner_list_subset = []
for column in owner_column_list:
    owner_list_subset+=list(owners_df_subset[column])
owner_list_subset = list(set(owner_list_subset))
# remove empty (no owner)
owner_list_subset.remove(numpy.nan)
unique_owner_list_subset = sorted(owner_list_subset)

##################################################
# convert percents to fractions
##################################################
owners_df_fractions = owners_df_subset.copy()

for col in percent_column_list:
    owners_df_fractions[col] = owners_df_fractions[col].apply(lambda x: numpy.nan if x in [numpy.nan] 
                                          else x[:-1]).astype(float)/100
#df['col'] = df['col'].apply(lambda x: np.nan if x in ['-'] else x[:-1]).astype(float)/100


##################################################
# create train count by owner, status
##################################################
no_owner_info_count = 0
owners_capacity_by_status_df = pandas.DataFrame(0.0, index=unique_owner_list_subset, columns=status_list)

for status in status_list:
    
    owners_df_temporary = owners_df_fractions.copy()[owners_df_fractions['Status']==status]
    
    for idx,row in owners_df_temporary.iterrows():
        
        ### how many owners are there?
        row_owners = list(row[owner_column_list])
        row_owners = [i for i in row_owners if str(i)!='nan']
        n_owners = row_owners.__len__()
        row_fractions = list(row[percent_column_list])
        row_fractions = [i for i in row_fractions if str(i)!='nan']

        ### if there are no owners listed, continue to next loop iteration
        if n_owners==0:
            no_owner_info_count+=1
            continue # doesn't complete the rest of the ifs

        ### now if row fractions is an empty list, create equal fractions list instead
        if row_fractions==[]:
            row_fractions = [1/n_owners]*n_owners

        train_capacity = row['CapacityInMtpa']
        ntrain_capacity_fractions = numpy.array(row_fractions)*train_capacity

        for owner_idx in range(n_owners):
            owners_capacity_by_status_df.loc[row_owners[owner_idx]][status]+=ntrain_capacity_fractions[owner_idx]

owners_capacity_by_status_df.index.name = 'Parent Company'
owners_capacity_by_status_df['In Development (Proposed + Construction)'] = owners_capacity_by_status_df[['Proposed','Construction']].sum(axis=1)
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list]

# rearrange the order of the columns for output
owners_capacity_by_status_df = owners_capacity_by_status_df[excel_status_list]

totals_row = owners_capacity_by_status_df.sum(axis=0)
totals_row.name = 'Total'
owners_capacity_by_status_df = owners_capacity_by_status_df.append(totals_row)

owners_capacity_by_status_df.to_excel('GGIT'+'-LNG-ExportTrainCapacityByOwnerAndStatus.xlsx', na_rep='--')


### no. of operating terminals by start year, facility type (1980–2021)

In [70]:
#terms_started_eu = terms_df_touse.copy()
#pipes_started_eu['StartYearLatest'].replace(numpy.nan,'',inplace=True)

year_indices = list(range(1980,2022))

terms_by_start_year_df = pandas.DataFrame(0.0, index=year_indices, columns=['Import terminals', 'Export trains'])

terms_started_eu = terms_df_touse.copy()[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Type1']=='LNG') & (terms_df_touse['Facility']=='Import')]
terms_by_start_year_df['Import terminals'] = terms_started_eu.groupby('StartYearEarliest').size()

terms_started_eu = terms_df_touse.copy()[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Type1']=='LNG') & (terms_df_touse['Facility']=='Export')]
terms_by_start_year_df['Export trains'] = terms_started_eu.groupby('StartYearEarliest').size()

terms_by_start_year_df.index.name = 'Start year'
terms_by_start_year_df.replace(numpy.nan,0,inplace=True)

totals_row = terms_by_start_year_df.sum(axis=0)
totals_row.name = 'Total'
terms_by_start_year_df = terms_by_start_year_df.append(totals_row)

terms_by_start_year_df.to_excel('GGIT'+'-LNG-TerminalCountsByStartYear.xlsx')

### capacity of operating terminals by start year, facility type (1980–2021)

In [73]:
#terms_started_eu = terms_df_touse.copy()
#pipes_started_eu['StartYearLatest'].replace(numpy.nan,'',inplace=True)

year_indices = list(range(1980,2022))

capacity_by_start_year_df = pandas.DataFrame(0.0, index=year_indices, columns=['Import terminals', 'Export trains'])

terms_started_eu = terms_df_touse.copy()[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Type1']=='LNG') & (terms_df_touse['Facility']=='Import')]
capacity_by_start_year_df['Import terminals'] = terms_started_eu.groupby('StartYearEarliest')['CapacityInMtpa'].sum()

terms_started_eu = terms_df_touse.copy()[(terms_df_touse['Status'].isin(['Operating'])) &
                              (terms_df_touse['Type1']=='LNG') & (terms_df_touse['Facility']=='Export')]
capacity_by_start_year_df['Export trains'] = terms_started_eu.groupby('StartYearEarliest')['CapacityInMtpa'].sum()

capacity_by_start_year_df.index.name = 'Start year'
capacity_by_start_year_df.replace(numpy.nan,0,inplace=True)

totals_row = capacity_by_start_year_df.sum(axis=0)
totals_row.name = 'Total'
capacity_by_start_year_df = capacity_by_start_year_df.append(totals_row)

capacity_by_start_year_df.to_excel('GGIT'+'-LNG-TerminalCapacityByStartYear.xlsx')