In [2]:
import pandas
pandas.set_option("display.max_rows", 50, "display.max_columns", 50)

import numpy
import pygsheets
import scipy.stats
import datetime

# import Pipelines_Current dataset

In [3]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')
#spreadsheet = gc.open_by_key('1foPLE6K-uqFlaYgLPAUxzeXfDO5wOOqE7tibNHeqTek')
spreadsheet = gc.open_by_key('1kIc-WYTFwKVsqHI4F03nH1_BDv6K434sKPhkoBwezZU') # mar 22 2022 version for EGT

#spreadsheet[1] "Gas Pipelines" tab is the second index
gas_pipes = spreadsheet.worksheet('title', 'Gas pipelines').get_as_df()
oil_pipes = spreadsheet.worksheet('title', 'Oil/NGL pipelines').get_as_df()
#owners = spreadsheet[2].get_as_df()

gas_pipes = gas_pipes.drop('WKTFormat', axis=1) # delete WKTFormat column
oil_pipes = oil_pipes.drop('WKTFormat', axis=1)
pipes_df_orig = pandas.concat([oil_pipes, gas_pipes], ignore_index=True)
# remove empty cells for pipes, owners
pipes_df_orig = pipes_df_orig[pipes_df_orig['PipelineName']!='']

#get other relevant sheets
country_ratios_df = spreadsheet.worksheet('title', 'Country ratios by pipeline').get_as_df()

In [4]:
country_ratios_df.replace('--', numpy.nan, inplace=True)

#parents_df_orig.replace('',numpy.nan,inplace=True)
#parents_df_orig.replace('--',numpy.nan,inplace=True)

pipes_df_orig.replace('--',numpy.nan,inplace=True)

In [5]:
region_df_orig = spreadsheet.worksheet('title', 'Region dictionary').get_as_df()

In [6]:
region_df_eu = region_df_orig.copy()[region_df_orig['EuropeanUnion']=='Yes']
region_df_egt = region_df_orig.copy()[region_df_orig['EuroGasTracker']=='Yes']
region_df_europe = region_df_orig.copy()[region_df_orig['Region']=='Europe']
region_df_eu_uk = region_df_orig.copy()[(region_df_orig['Region']=='Europe') | 
                                          (region_df_orig['Country'].isin(['United Kingdom','Israel']))]

In [7]:
region_df_touse = region_df_eu.copy()

In [8]:
western_eu = ['Ireland', 'France', 'Belgium', 'Netherlands', 
              'Luxembourg', 'Germany', 'Austria'] 

eastern_eu = ['Estonia', 'Latvia', 'Lithuania', 'Poland', 'Czech Republic', 
              'Slovakia', 'Slovenia', 'Croatia', 'Hungary', 'Romania', 'Bulgaria'] 

southern_eu = ['Portugal', 'Spain', 'Italy', 
               'Malta', 'Greece', 'Cyprus']

northern_eu = ['Sweden', 'Finland', 'Denmark']

### add double dagger to PCI5 projects

In [9]:
double_dagger = u"\u2021"

In [10]:
pipes_df_orig.loc[pipes_df_orig.PCI5=='yes','PipelineName'] += ' ' + double_dagger

country_ratios_df.loc[country_ratios_df.ProjectID.isin(
                pipes_df_orig[pipes_df_orig.PCI5=='yes']['ProjectID'].tolist()), 'PipelineName'] += ' '+double_dagger

# cost estimates for EU 27 member states

## outliers and special cases

In [11]:
outliers_projectids = ['P3104']

# Adriatica Pipeline Phases 3,4,5 together cost 1.384 billion EUR
# total of 141.00 + 170.00 + 114.00 km = 425 km
# add this back in later

#P0753 - Nord Stream Gas Pipeline
#P3104 - Adriatica Pipeline
#P0752 - Nord Stream 2 Gas Pipeline
#P0760 - South Stream Gas Pipeline

In [12]:
# force Nigeria-Morocco Pipeline to be Proposed (instead of Construction)
country_ratios_df.loc[country_ratios_df.PipelineName=='Nigeria-Morocco Gas Pipeline', 'Status'] = 'Proposed'

In [13]:
# https://www.gem.wiki/Poland-Ukraine_Interconnector_Gas_Pipeline
# our country_ratios code calculates this is half in each country, but it's not
country_ratios_df.loc[(country_ratios_df.PipelineName=='Poland-Ukraine Interconnector Gas Pipeline')&
                      (country_ratios_df.Country=='Poland'),'MergedKmByCountry'] = 1.5

country_ratios_df.loc[(country_ratios_df.PipelineName=='Poland-Ukraine Interconnector Gas Pipeline')&
                      (country_ratios_df.Country=='Ukraine'),'MergedKmByCountry'] = 99.0

### correct Gas Interconnection Poland-Lithuania (our fractional calc doesn't get it right)

In [14]:
country_ratios_df.loc[(country_ratios_df.PipelineName=='Gas Interconnection Poland-Lithuania')&
                      (country_ratios_df.Country=='Poland'),'MergedKmByCountry'] = 343.0

country_ratios_df.loc[(country_ratios_df.PipelineName=='Gas Interconnection Poland-Lithuania')&
                      (country_ratios_df.Country=='Lithuania'),'MergedKmByCountry'] = 165.0


In [15]:
# pull out only pipelines that have a KNOWN LENGTH AND COST and are in the EU27
country_ratios_cost_calc = country_ratios_df.copy()[(country_ratios_df['Fuel']=='Gas') &  
                                                    (country_ratios_df['CostEuroPerKm'].notna()) &
                                                    (country_ratios_df['Country'].isin(region_df_touse['Country'].tolist()))
                                                    ]

country_ratios_cost_calc = country_ratios_cost_calc[~country_ratios_cost_calc.ProjectID.isin(outliers_projectids)]


### add Adriatica (costs are reported in an odd way)

In [16]:
#adriatica_series = pandas.Series([0]*country_ratios_cost_calc.shape[1])
#adriatica_series.name='Adriatica'
country_ratios_cost_calc = country_ratios_cost_calc.append(pandas.Series(name='Adriatica', dtype='object'))
country_ratios_cost_calc.loc['Adriatica','Country'] = 'Italy'
country_ratios_cost_calc.loc['Adriatica','CostEuroPerKm'] = 1.384e9/425
country_ratios_cost_calc.loc['Adriatica','MergedKmByCountry'] = 425

### get cost value per country for calculating country-specific pipeline costs

In [17]:
country_level_pipeline_costs_df = pandas.DataFrame(numpy.nan, index=region_df_touse['Country'].tolist(), columns=['EstCostInEachCountry', 'NumValues'])

country_level_pipeline_costs_df['EstCostInEachCountry'] = country_ratios_cost_calc.groupby('Country')['CostEuroPerKm'].mean()/1.e6
country_level_pipeline_costs_df['NumValues'] = country_ratios_cost_calc.groupby('Country')['CostEuroPerKm'].count()
country_level_pipeline_costs_df

Unnamed: 0,EstCostInEachCountry,NumValues
Austria,4.749263,2.0
Belgium,,
Bulgaria,1.302499,9.0
Croatia,1.525,9.0
Cyprus,2.908187,3.0
Czech Republic,,
Denmark,7.754481,3.0
Estonia,,
Finland,9.917435,2.0
France,2.826456,2.0


In [18]:
country_level_pipeline_costs_df.mean()

EstCostInEachCountry    3.42366
NumValues               5.00000
dtype: float64

### now use these country-specific costs as multipliers on within-country kilometers

In [24]:
mean_cost_per_km = country_level_pipeline_costs_df['EstCostInEachCountry'].mean() # in millions of Euro

country_ratios_df_eu27 = country_ratios_df.copy()[(country_ratios_df['Country'].isin(region_df_touse['Country'].tolist())) &
                                                  (country_ratios_df['Fuel']=='Gas')]

country_ratios_df_eu27['TotalEstCostEuroMillions'] = numpy.nan
for idx,row in country_ratios_df_eu27.iterrows():
#    country_ratios_df_eu27.loc[idx,'TotalEstCostEuroMillions'] = row['MergedKmByCountry'] * country_level_pipeline_costs_df.loc[row['Country']]['EstCostInEachCountry']
    country_ratios_df_eu27.loc[idx,'TotalEstCostEuroMillions'] = row['MergedKmByCountry'] * mean_cost_per_km

# now replace the ones that we have exact numbers for...
costs_exist_TF = ~country_ratios_df_eu27['CostEuroPerKm'].isna()
country_ratios_df_eu27.loc[costs_exist_TF,'TotalEstCostEuroMillions'] = \
    country_ratios_df_eu27[costs_exist_TF]['CostEuroPerKm']/1e6 * \
    country_ratios_df_eu27[costs_exist_TF]['MergedKmByCountry']

# add in Trans-Adriatic estimated cost:
country_ratios_df_eu27.loc[(country_ratios_df_eu27['PipelineName']=='Trans-Adriatic Gas Pipeline')&
                            (country_ratios_df_eu27['SegmentName']=='TAP Expansion')&
                            (country_ratios_df_eu27['Country']=='Italy'),'Country'] = 'Italy, Greece'
country_ratios_df_eu27.loc[(country_ratios_df_eu27['PipelineName']=='Trans-Adriatic Gas Pipeline')&
                            (country_ratios_df_eu27['SegmentName']=='TAP Expansion')&
                            (country_ratios_df_eu27['Country']=='Italy, Greece'), 'TotalEstCostEuroMillions'] = \
country_ratios_df.loc[(country_ratios_df.ProjectID=='P3203'),'CostEuro'].values[0]/1e6# * 2/3 * 0.5 # greece and italy, not albania

greece_idx = country_ratios_df_eu27.index[(country_ratios_df_eu27['PipelineName']=='Trans-Adriatic Gas Pipeline')&
                            (country_ratios_df_eu27['SegmentName']=='TAP Expansion')&
                            (country_ratios_df_eu27['Country']=='Greece')]
country_ratios_df_eu27.drop(index=greece_idx, inplace=True)

# finally, get rid of any pipeline segments that are zero in a given country, except TAP
country_ratios_df_eu27 = country_ratios_df_eu27[(country_ratios_df_eu27['PipelineName']=='Trans-Adriatic Gas Pipeline')|
                                                (country_ratios_df_eu27['MergedKmByCountry']!=0)]


In [25]:
mean_cost_per_km # in million Euro

3.4236597491262057

In [26]:
output_columns = ['Country',
                  'PipelineName',
                  'SegmentName',
                  'Status',
                  'CapacityBcm/y',
                  'MergedKmByCountry',
                  'TotalEstCostEuroMillions']
country_ratios_df_eu27_excel = country_ratios_df_eu27[country_ratios_df_eu27['Status'].isin(['Proposed','Construction'])]
country_ratios_df_eu27_excel = country_ratios_df_eu27_excel.sort_values(['Status','PipelineName','TotalEstCostEuroMillions'], ascending=[True,True,False])
country_ratios_df_eu27_excel.replace(numpy.nan, '')
country_ratios_df_eu27_excel = country_ratios_df_eu27_excel[output_columns]

#country_ratios_cost_calc_excel = country_ratios_cost_calc_excel.groupby('Status')
#print(country_ratios_cost_calc_excel)
#country_ratios_df_eu27_excel.to_excel('EGT-km-and-cost-per-country'+str(datetime.date.today())+'.xlsx', index=False)

## print export, import, and within-EU pipelines

planned pipelines to import gas into the EU

In [27]:
# get the pipelines that import into the EU, export out, and are completely within
pipes_df_eu_imports = pipes_df_orig.copy()[(pipes_df_orig['Fuel']=='Gas')&
              (~pipes_df_orig['StartCountry'].isin(region_df_touse.Country))&
              (pipes_df_orig['EndCountry'].isin(region_df_touse.Country))]

pipes_df_eu_exports = pipes_df_orig.copy()[(pipes_df_orig['Fuel']=='Gas')&
              (pipes_df_orig['StartCountry'].isin(region_df_touse.Country))&
              (~pipes_df_orig['EndCountry'].isin(region_df_touse.Country))]

pipes_df_eu_within = pipes_df_orig.copy()[(pipes_df_orig['Fuel']=='Gas')&
              (pipes_df_orig['StartCountry'].isin(region_df_touse.Country))&
              (pipes_df_orig['EndCountry'].isin(region_df_touse.Country))]

## creating Table 5 (appendix)

In [28]:
# pipes_df_eu_imports.loc[pipes_df_eu_imports.Status.isin(
#['Proposed','Construction'])].ProjectID

# only construction, proposed project IDs here, built on a list:
import_projectids = [
'P0463',
'P0479',
'P0684',
'P0702',
'P0732',
'P0827',
'P1321',
'P1324',
#'P1773', # this is the Siret-Khotyn Gas Pipeline, doesn't count
'P2264',
'P3335',
'P2727', # Gheraesti-Siret Gas Pipeline (Ukraine to Romania, bidirectional), added manually
#'P3522', # UK-Ireland interconnector
'P3203' # Trans-Adriatic Gas Pipeline TAP Expansion
]

all_other_projectids = list(set(country_ratios_df_eu27.loc[
    country_ratios_df_eu27.Status.isin(['Proposed','Construction'])].ProjectID) - set(import_projectids))

output_columns = ['Country',
                  'PipelineName',
                  'CapacityBcm/y',
                  'MergedKmByCountry',
                  'TotalEstCostEuroMillions']

country_ratios_df_eu27.loc[country_ratios_df_eu27.ProjectID.isin(import_projectids)].sort_values(['Status','PipelineName','TotalEstCostEuroMillions'], ascending=[True,True,False])[output_columns].replace(numpy.nan,'').style.hide_index()

Country,PipelineName,CapacityBcm/y,MergedKmByCountry,TotalEstCostEuroMillions
Denmark,Baltic Pipe Project ‡,10.0,423.99,1453.680001
Poland,Baltic Pipe Project ‡,10.0,102.14,350.194286
Sweden,Baltic Pipe Project ‡,10.0,86.37,296.125714
Cyprus,Anamur to North Cyprus Gas Pipeline,,42.07,144.033366
Croatia,Bosnia and Herzegovina–Croatia South Interconnection Gas Pipeline,1.5,62.22,213.02011
Greece,East Med Gas Pipeline ‡,10.0,1292.7,4147.700535
Cyprus,East Med Gas Pipeline ‡,10.0,571.27,1832.951872
Romania,Gheraesti-Siret Gas Pipeline,,145.14,124.263698
Croatia,Ionian Adriatic Gas Pipeline,5.0,263.54,285.989705
Cyprus,Israel Cyprus Gas Pipeline,1.0,152.18,217.311302


## creating Table 7 (appendix)

### longer than 150 km

In [29]:
#projectids_threshold = list(
#    set(country_ratios_df_eu27.loc[country_ratios_df_eu27.MergedKmByCountry>75]['ProjectID'])
#)
#
#projectids_threshold = list(
#    set(country_ratios_df_eu27.loc[country_ratios_df_eu27.MergedKmByCountry>100]['ProjectID'])
#)

# must be longer than 150 total, and the segment must be located within the EU
projectids_threshold = list(
    set(country_ratios_df_eu27.loc[(country_ratios_df_eu27.MergedKmByPipeline>150)&
                                   (country_ratios_df_eu27.ProjectID.isin(all_other_projectids))]['ProjectID'])
)

projectids_under_threshold = list(
    set(all_other_projectids) - set(projectids_threshold)
)

projectids_threshold.__len__()

24

In [30]:
output_columns = ['Country',
                  'PipelineName',
                  #'SegmentName',
                  #'Status',
                  'CapacityBcm/y',
                  'MergedKmByPipeline',
                  'MergedKmByCountry',
                  'TotalEstCostEuroMillions']

country_ratios_df_eu27.replace(numpy.nan, '', inplace=True)
country_ratios_df_eu27.loc[(country_ratios_df_eu27.ProjectID.isin(projectids_threshold))].sort_values(['Status','PipelineName','TotalEstCostEuroMillions'], ascending=[True,True,False])[output_columns].style.hide_index()



Country,PipelineName,CapacityBcm/y,MergedKmByPipeline,MergedKmByCountry,TotalEstCostEuroMillions
Bulgaria,Bulgaria-Serbia Interconnector Gas Pipeline ‡,1.8,170.0,62.11,48.957294
Poland,Gas Interconnection Poland-Lithuania,2.4,508.0,343.0,382.161418
Lithuania,Gas Interconnection Poland-Lithuania,2.4,508.0,165.0,183.838583
Bulgaria,Gas Interconnector Greece-Bulgaria (IGB) ‡,3.0,184.0,157.15,204.978261
Greece,Gas Interconnector Greece-Bulgaria (IGB) ‡,3.0,184.0,26.85,35.021739
Poland,Gustorzyn-Wronów Gas Pipeline,,308.0,308.0,1054.487203
Italy,Methanization of Sardinia Project,,573.0,573.0,614.657896
Poland,Pogórska-Wola-Tworzen Gas Pipeline,,168.0,168.0,300.500001
Slovakia,Poland-Slovakia Gas Pipeline,5.7,165.0,108.19,177.038182
Poland,Poland-Slovakia Gas Pipeline,5.7,165.0,56.81,92.961818


### shorter than 150 km

In [31]:
below_threshold_df = country_ratios_df_eu27.loc[country_ratios_df_eu27.ProjectID.isin(projectids_under_threshold)]#.groupby('Status')

below_threshold_df.groupby('Status').sum()[['MergedKmByCountry','TotalEstCostEuroMillions']]



Unnamed: 0_level_0,MergedKmByCountry,TotalEstCostEuroMillions
Status,Unnamed: 1_level_1,Unnamed: 2_level_1
Construction,609.5,1308.200141
Proposed,1925.65,4727.202989


In [32]:
below_threshold_df.ProjectID

1594    P1342
1595    P1343
1599    P1346
1600    P1347
1601    P1348
1602    P1349
1749    P1487
1883    P1689
1884    P1690
1885    P1691
1901    P1705
1903    P1706
1905    P1708
1906    P1708
1910    P1710
1911    P1711
1912    P1712
1913    P1712
2245    P2083
2246    P2084
2250    P2088
2251    P2089
2252    P2090
2356    P2191
2438    P2265
2439    P2266
2444    P2270
2445    P2271
2446    P2272
2650    P2467
2909    P2721
2939    P2749
3299    P3104
3303    P3108
3304    P3109
3333    P3137
3334    P3138
3441    P3243
3442    P3244
3630    P3418
3631    P3419
3632    P3420
3708    P3494
3815    P3606
Name: ProjectID, dtype: object

In [33]:
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(
    ['Proposed','Construction'])]['MergedKmByCountry'].sum()

15731.67

# import terminals now

In [34]:
credentials_directory = '/Users/baird/Dropbox/_google-api/'
gc = pygsheets.authorize(client_secret=credentials_directory+'client_secret.json')
#spreadsheet = gc.open_by_key('1tcS6Wd-Wp-LTDpLzFgJY_RSNDnbyubW3J_9HKIAys4A')
spreadsheet = gc.open_by_key('1BHiidrJnzh96Em3KrqBpCoWaJ3GB5OeCY9dHVaEDEFs') # mar 22 2022 version for EGT

#spreadsheet[1] "Gas Pipelines" tab is the second index
terms_df_orig = spreadsheet.worksheet('title', 'Terminals').get_as_df()

In [35]:
# replace all -- with nans
terms_df_orig.replace('--', numpy.nan, inplace=True)
# remove oil export terminals
terms_df_orig = terms_df_orig.loc[terms_df_orig['Type1']!='Oil']
# remove anything without a wiki page
terms_df_orig = terms_df_orig.loc[terms_df_orig['Wiki']!='']
# remove N/A statuses
terms_df_orig = terms_df_orig.loc[terms_df_orig['Status']!='']

In [36]:
terms_df_orig.loc[terms_df_orig.PCI5=='yes','Name'] += ' ' + double_dagger

In [37]:
terms_df_orig.loc[terms_df_orig.PCI5=='yes','Name']

494        Cyprus LNG Terminal (Import) ‡
881    Polish Baltic Sea Coast Terminal ‡
Name: Name, dtype: object

In [38]:
terms_df_eu27 = terms_df_orig.copy()[(terms_df_orig['Country'].isin(region_df_touse['Country']))]

In [39]:
mtpa_multiplier = 1/0.7352941863 # multiply by this to convert to bcm
euro_multiplier = 1/1.14 # multiply USD by this to get Euro, should be same as currency conversions in Sheet

In [40]:
# pull out only import terminals within EU27
terms_cost_calc_eu27 = terms_df_eu27.copy()[(terms_df_eu27['Facility']=='Import')]

In [41]:
terms_cost_calc_eu27.loc[terms_cost_calc_eu27['Status']=='Operating']['CapacityInBcm/y'].sum()

160.78

### outliers and special cases

In [42]:
outliers_comboids = ['T068300'] # Ravenna=068300
terms_cost_calc_eu27 = terms_cost_calc_eu27.loc[~terms_cost_calc_eu27.ComboID.isin(outliers_comboids)]

### create onshore and floating dfs

In [43]:
terms_cost_calc_eu27_onshore = terms_cost_calc_eu27.loc[terms_cost_calc_eu27['Floating']=='']
terms_cost_calc_eu27_floating = terms_cost_calc_eu27.loc[terms_cost_calc_eu27['Floating']=='yes']

### how many go into calculation - floating

In [44]:
print(terms_cost_calc_eu27_floating.shape)
print(terms_cost_calc_eu27_floating[~terms_cost_calc_eu27_floating['CostUSDPerBcm/y'].isna()].shape)

terms_cost_calc_eu27_floating.loc[:,'CostMillionEuroPerBcm/y'] = numpy.nan
terms_cost_calc_eu27_floating.loc[:,'CostMillionEuroPerBcm/y'] = terms_cost_calc_eu27_floating.loc[:,'CostUSDPerBcm/y']*euro_multiplier / 1e6
# for floating terminals, remove anything that's missing a cost per bcm/y value
# none of these are expansions (probably very rare to have a FSRU/FLNG expansion?) so will not worry about that

terms_cost_calc_eu27_floating.loc[(~terms_cost_calc_eu27_floating['CostUSDPerBcm/y'].isna())][['Name', 'Country', 'Status', 'Wiki','CostMillionEuroPerBcm/y']].style.hide_index()
terms_cost_calc_eu27_floating_noexpansions = terms_cost_calc_eu27_floating.loc[~terms_cost_calc_eu27_floating['Name'].str.contains('Expansion')]
terms_cost_calc_eu27_floating_noexpansions.loc[~terms_cost_calc_eu27_floating_noexpansions.loc[:,'CostUSDPerBcm/y'].isna()][['Name', 'Country', 'Status', 'Wiki','CostMillionEuroPerBcm/y']].style.hide_index()


(15, 65)
(10, 65)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Name,Country,Status,Wiki,CostMillionEuroPerBcm/y
Krk LNG Terminal,Croatia,Operating,https://www.gem.wiki/Hrvatska_LNG_Terminal,85.882361
Krk LNG Terminal Phase 2,Croatia,Proposed,https://www.gem.wiki/Hrvatska_LNG_Terminal,110.064349
Cyprus LNG Terminal (Import) ‡,Cyprus,Construction,https://www.gem.wiki/Cyprus_LNG_Terminal,382.352977
Wilhelmshaven LNG Terminal,Germany,Proposed,https://www.gem.wiki/Wilhelmshaven_LNG_Terminal,63.596491
Toscana LNG Terminal,Italy,Operating,https://www.gem.wiki/Toscana_LNG_Terminal,144.432787
Klaipeda LNG Terminal,Lithuania,Operating,https://www.gem.wiki/Klaipeda_LNG_Terminal,25.535937
Alexandroupolis LNG Terminal,Greece,Proposed,https://www.gem.wiki/Alexandroupolis_LNG_Terminal,47.540984
Cork FSRU Terminal,Ireland,Cancelled,https://www.gem.wiki/Cork_LNG_Terminal,31.25
Skulte LNG Terminal,Latvia,Proposed,https://www.gem.wiki/Skulte_LNG_Terminal,17.741935
Dioriga FSRU Terminal,Greece,Proposed,https://www.gem.wiki/Dioriga_FSRU_Terminal,115.384615


In [55]:
terms_cost_calc_eu27_floating_noexpansions[~terms_cost_calc_eu27_floating_noexpansions.loc[:,'CostUSDPerBcm/y'].isna()][['Name', 'Country', 'Status', 'CapacityInBcm/y', 'CostMillionEuroPerBcm/y']].style.hide_index()




Name,Country,Status,CapacityInBcm/y,CostMillionEuroPerBcm/y
Krk LNG Terminal,Croatia,Operating,2.72,85.882361
Krk LNG Terminal Phase 2,Croatia,Proposed,4.35,110.064349
Cyprus LNG Terminal (Import) ‡,Cyprus,Construction,0.82,382.352977
Wilhelmshaven LNG Terminal,Germany,Proposed,10.0,63.596491
Toscana LNG Terminal,Italy,Operating,3.81,144.432787
Klaipeda LNG Terminal,Lithuania,Operating,3.94,25.535937
Alexandroupolis LNG Terminal,Greece,Proposed,6.1,47.540984
Cork FSRU Terminal,Ireland,Cancelled,4.0,31.25
Skulte LNG Terminal,Latvia,Proposed,6.2,17.741935
Dioriga FSRU Terminal,Greece,Proposed,2.6,115.384615


### how many go into calculation - onshore

In [46]:
print(terms_cost_calc_eu27_onshore.shape)
print(terms_cost_calc_eu27_onshore.loc[(~terms_cost_calc_eu27_onshore['CostUSDPerBcm/y'].isna()) &
                        (~terms_cost_calc_eu27_onshore['Name'].str.contains('Expansion'))].shape)

terms_cost_calc_eu27_onshore.loc[:,'CostMillionEuroPerBcm/y'] = terms_cost_calc_eu27_onshore.loc[:,'CostUSDPerBcm/y']*euro_multiplier / 1e6
# for floating terminals, remove anything missing cost per bcm/y AND anything that's an expansion project

terms_cost_calc_eu27_onshore_noexpansions = terms_cost_calc_eu27_onshore.loc[~terms_cost_calc_eu27_onshore['Name'].str.contains('Expansion')]
terms_cost_calc_eu27_onshore_noexpansions[~terms_cost_calc_eu27_onshore_noexpansions.loc[:,'CostUSDPerBcm/y'].isna()][['Name', 'Country', 'Status', 'Wiki','CostMillionEuroPerBcm/y']].style.hide_index()


(64, 65)
(11, 65)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Name,Country,Status,Wiki,CostMillionEuroPerBcm/y
Paldiski LNG Terminal,Estonia,Proposed,https://www.gem.wiki/Paldiski_LNG_Terminal,163.398708
Tallinn LNG Terminal,Estonia,Shelved,https://www.gem.wiki/Tallinn_LNG_Terminal,68.082795
Hamina LNG Terminal,Finland,Construction,https://www.gem.wiki/Hamina_LNG_Terminal,735.294186
Brunsbüttel LNG Terminal,Germany,Proposed,https://www.gem.wiki/Hamburg_LNG_Terminal,56.25
Shannon LNG Terminal Phase I,Ireland,Proposed,https://www.gem.wiki/Shannon_LNG_Terminal,232.142857
Porto Empedocle LNG Terminal,Italy,Shelved,https://www.gem.wiki/Porto_Empedocle_LNG_Terminal,79.65687
Gran Canaria LNG Terminal,Spain,Proposed,https://www.gem.wiki/Gran_Canaria_LNG_Terminal,199.632372
Tenerife LNG Terminal,Spain,Cancelled,https://www.gem.wiki/Tenerife_LNG_Terminal,220.588256
Stade LNG Terminal,Germany,Proposed,https://www.gem.wiki/Stade_LNG_Terminal,83.333333
HIGAS LNG Terminal,Italy,Operating,https://www.gem.wiki/HIGAS_LNG_Terminal,93.62746


In [52]:
terms_cost_calc_eu27_onshore_noexpansions[~terms_cost_calc_eu27_onshore_noexpansions.loc[:,'CostUSDPerBcm/y'].isna()][['Name', 'Country', 'Status', 'CapacityInBcm/y', 'Wiki','CostMillionEuroPerBcm/y']].style.hide_index()

Name,Country,Status,CapacityInBcm/y,Wiki,CostMillionEuroPerBcm/y
Paldiski LNG Terminal,Estonia,Proposed,2.45,https://www.gem.wiki/Paldiski_LNG_Terminal,163.398708
Tallinn LNG Terminal,Estonia,Shelved,3.67,https://www.gem.wiki/Tallinn_LNG_Terminal,68.082795
Hamina LNG Terminal,Finland,Construction,0.14,https://www.gem.wiki/Hamina_LNG_Terminal,735.294186
Brunsbüttel LNG Terminal,Germany,Proposed,8.0,https://www.gem.wiki/Hamburg_LNG_Terminal,56.25
Shannon LNG Terminal Phase I,Ireland,Proposed,2.8,https://www.gem.wiki/Shannon_LNG_Terminal,232.142857
Porto Empedocle LNG Terminal,Italy,Shelved,8.16,https://www.gem.wiki/Porto_Empedocle_LNG_Terminal,79.65687
Gran Canaria LNG Terminal,Spain,Proposed,1.36,https://www.gem.wiki/Gran_Canaria_LNG_Terminal,199.632372
Tenerife LNG Terminal,Spain,Cancelled,1.36,https://www.gem.wiki/Tenerife_LNG_Terminal,220.588256
Stade LNG Terminal,Germany,Proposed,12.0,https://www.gem.wiki/Stade_LNG_Terminal,83.333333
HIGAS LNG Terminal,Italy,Operating,0.41,https://www.gem.wiki/HIGAS_LNG_Terminal,93.62746


### t test to see if floating and onshore different

p value is >0.23, doesn't seem to be statistically independent

In [47]:
scipy.stats.ttest_ind(terms_cost_calc_eu27_onshore[(~terms_cost_calc_eu27_onshore['CostMillionEuroPerBcm/y'].isna())]['CostMillionEuroPerBcm/y'].values,
                      terms_cost_calc_eu27_floating[(~terms_cost_calc_eu27_floating['CostMillionEuroPerBcm/y'].isna())]['CostMillionEuroPerBcm/y'].values)

Ttest_indResult(statistic=1.258332420149705, pvalue=0.2203758872428529)

### mean floating and onshore costs

In [1515]:
terms_cost_calc_eu27_floating_noexpansions['CostMillionEuroPerBcm/y'].mean() #* euro_multiplier

102.37824360438597

In [1516]:
terms_cost_calc_eu27_onshore_noexpansions['CostMillionEuroPerBcm/y'].mean() #* euro_multiplier

184.8769256012759

### calculate import terminal costs

In [1517]:
terms_df_eu27_import = terms_df_eu27.loc[terms_df_eu27['Facility']=='Import']

In [1518]:
terms_df_eu27_export = terms_df_eu27.loc[terms_df_eu27['Facility']=='Export']

In [1519]:
terms_df_eu27_import_future = terms_df_eu27_import.loc[
    terms_df_eu27_import['Status'].isin(['Proposed','Construction'])]

# do for floating
terms_df_eu27_import_future.loc[:,'CostMillionEuroTotal'] = numpy.nan
terms_df_eu27_import_future.loc[
    (terms_df_eu27_import_future['Floating']=='yes'), 'CostMillionEuroTotal'] = terms_df_eu27_import_future.loc[
    (terms_df_eu27_import_future['Floating']=='yes'), 'CapacityInBcm/y'] * terms_cost_calc_eu27_floating_noexpansions['CostMillionEuroPerBcm/y'].mean()

# now do for onshore
terms_df_eu27_import_future.loc[:,'CostMillionEuroTotal'] = numpy.nan
terms_df_eu27_import_future.loc[
    (terms_df_eu27_import_future['Floating']!='yes'), 'CostMillionEuroTotal'] = terms_df_eu27_import_future.loc[
    (terms_df_eu27_import_future['Floating']!='yes'), 'CapacityInBcm/y'] * terms_cost_calc_eu27_onshore_noexpansions['CostMillionEuroPerBcm/y'].mean()

# then replace any that we have already
terms_df_eu27_import_future.loc[:,'CostEstUSD']*euro_multiplier/1.e6

# now replace the ones that we have exact numbers for...
costs_exist_TF = ~terms_df_eu27_import_future['CostEstUSD'].isna()
terms_df_eu27_import_future.loc[costs_exist_TF,'CostMillionEuroTotal'] = \
    terms_df_eu27_import_future[costs_exist_TF]['CostEstUSD']*euro_multiplier/1e6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


### correct outliers/special cases

In [1520]:
# Zeebrugge terminal is 116 million for BOTH expansion projects
#T042601	https://www.gem.wiki/Fluxys_Zeebrugge_LNG_Terminal	Zeebrugge LNG Terminal, 2024 Expansion
#T042602	https://www.gem.wiki/Fluxys_Zeebrugge_LNG_Terminal	Zeebrugge LNG Terminal, 2026 Expansion
outliers_comboids = ['T042602']
terms_df_eu27_import_future.loc[
    terms_df_eu27_import_future.ComboID.isin(outliers_comboids),'CostMillionEuroTotal'] = 0.0


# creating table 6 (appendix)

In [1521]:
terms_df_eu27_import_future.columns

Index(['TerminalID', 'ProjectID', 'ComboID', 'Wiki', 'Name', 'Type1',
       'Facility', 'Country', 'Researcher', 'LastUpdated', 'Status',
       'OtherEnglishNames', 'Owner', 'Type2', 'ProposalYear',
       'ConstructionYear', 'StartYear1', 'StartYear2', 'StartYear3', 'Delayed',
       'DelayType', 'StartYearEarliest', 'ShelvedYear', 'CancelledYear',
       'StopYear', 'ShelvedCancelledStatusType', 'Capacity', 'CapacityUnits',
       'CapacityInMtpa', 'CapacityInBcm/y', 'Region', 'Location',
       'Prefecture/District', 'State/Province', 'Latitude', 'Longitude',
       'Accuracy', 'Source', 'PowerPlantsSupplied', 'CostEst', 'CostEstUnits',
       'CostEstYear', 'CostEstUSD', 'FID', 'FIDYear', 'WriteDown', 'ReExport',
       'EuropeTracker', 'PCINumber', 'PCI3', 'PCI4', 'PCI5', 'Floating',
       'Opposition', 'ESJNotes', 'Defeated', 'OtherLanguageTerminalName',
       'OtherLanguageWikiPage', 'H2Proposed', 'H2Notes', 'ResearcherNotes1',
       'ResearcherNotes2', 'ResearcherNotes3', 

In [1522]:
terms_df_eu27_import_future['CapacityInBcm/y'].sum()

106.98

In [1523]:
output_columns = ['Country',
                'Name',
                'CapacityInBcm/y',
                #'Status',
                'CostMillionEuroTotal']
terms_df_eu27_import_future.sort_values(['Status','Name'], ascending=[True,True])[output_columns].replace(numpy.nan,'').style.hide_index()

Country,Name,CapacityInBcm/y,CostMillionEuroTotal
Cyprus,Cyprus LNG Terminal (Import) ‡,0.82,312.0
Finland,Hamina LNG Terminal,0.14,100.0
Poland,Świnoujście Polskie LNG Terminal Expansion,2.5,427.0
Poland,Świnoujście Polskie LNG Terminal Expansion 2,0.8,147.90154
Greece,Alexandroupolis LNG Terminal,6.1,290.0
Germany,Brunsbüttel LNG Terminal,8.0,450.0
Romania,Constanta LNG Terminal,8.16,1508.595713
Malta,Delimara Onshore LNG Terminal,,
Greece,Dioriga FSRU Terminal,2.6,300.0
France,Fos Cavaou LNG Terminal Expansion 1,2.72,1571.0


In [1524]:
for i in sorted(terms_df_eu27_import_future.Name): print(i)

Alexandroupolis LNG Terminal
Brunsbüttel LNG Terminal
Constanta LNG Terminal
Cyprus LNG Terminal (Import) ‡
Delimara Onshore LNG Terminal
Dioriga FSRU Terminal
Fos Cavaou LNG Terminal Expansion 1
Fos Cavaou LNG Terminal Expansion 2
Frederikshavn LNG Terminal
Gate LNG Terminal Expansion
Gran Canaria LNG Terminal
Hamina LNG Terminal
Krk LNG Terminal Phase 2
Montoir LNG Terminal Expansion
Mugardos LNG Terminal Expansion
Paldiski LNG Terminal
Polish Baltic Sea Coast Terminal ‡
Predator FSRU Terminal
Puerto de la Luz LNG Terminal
Rauma LNG Terminal
Shannon LNG Terminal Phase I
Shannon LNG Terminal Phase II
Shannon LNG Terminal Phase III
Skulte LNG Terminal
Stade LNG Terminal
Wilhelmshaven LNG Terminal
Zeebrugge LNG Terminal, 2024 Expansion
Zeebrugge LNG Terminal, 2026 Expansion
Świnoujście Polskie LNG Terminal Expansion
Świnoujście Polskie LNG Terminal Expansion 2


# creating table 2 - future gas infra in EU countries

In [1525]:
terms_df_eu27_import_future.groupby(['Country'])['CapacityInBcm/y'].sum()
#terms_df_eu27_import_future.groupby('Country')['CapacityInMtpa'].sum()

Country
Belgium         8.16
Croatia         4.35
Cyprus          0.82
Denmark         0.07
Estonia         2.45
Finland         0.14
France         10.61
Germany        30.00
Greece          8.70
Ireland        11.46
Latvia          6.20
Malta           0.00
Netherlands     1.50
Poland          9.40
Romania         8.16
Spain           4.96
Name: CapacityInBcm/y, dtype: float64

In [1526]:
country_ratios_df_eu27.loc[country_ratios_df_eu27.ProjectID.isin(
    import_projectids+all_other_projectids)].groupby('Country')['TotalEstCostEuroMillions'].sum()

Country
Austria           106.000000
Bulgaria         2574.120651
Croatia          1197.741850
Cyprus           2436.302240
Denmark          1453.680001
Greece           7477.912669
Hungary           724.650278
Ireland            89.015153
Italy            4709.050293
Italy, Greece    1035.000000
Latvia             26.000000
Lithuania         183.838583
Malta             181.858415
Poland           3918.526783
Portugal          730.081210
Romania          3821.412933
Slovakia          238.529904
Slovenia          516.880733
Spain            4529.363316
Sweden            296.125714
Name: TotalEstCostEuroMillions, dtype: float64

In [1527]:
# table 1 shows country totals of pipeline length, cost, LNG capacity, cost, total cost
table2_columns = ['PipelineLengthKm','PipelineCostMillionEuro',
                  'LNGImportCapacityBcm/y','LNGTerminalCostMillionEuro','TotalCostMillionEuro']
table2_df = pandas.DataFrame(columns=table2_columns, index=region_df_touse.Country.tolist())

table2_df['PipelineLengthKm'] = country_ratios_df_eu27.loc[country_ratios_df_eu27.ProjectID.isin(
    import_projectids+all_other_projectids)].groupby('Country')['MergedKmByCountry'].sum()

table2_df['PipelineCostMillionEuro'] = country_ratios_df_eu27.loc[country_ratios_df_eu27.ProjectID.isin(
    import_projectids+all_other_projectids)].groupby('Country')['TotalEstCostEuroMillions'].sum()

# add half of Trans-Adriatic expansion to Italy, half to Greece
table2_df.loc['Greece','PipelineCostMillionEuro']+=1035/2
table2_df.loc['Italy','PipelineCostMillionEuro']+=1035/2

table2_df['LNGImportCapacityBcm/y'] = terms_df_eu27_import_future.groupby('Country')['CapacityInBcm/y'].sum()
table2_df['LNGTerminalCostMillionEuro'] = terms_df_eu27_import_future.groupby('Country')['CostMillionEuroTotal'].sum()
table2_df['TotalCostMillionEuro'] = table2_df.loc[:,('PipelineCostMillionEuro','LNGTerminalCostMillionEuro')].sum(axis=1)

if 'Total' in table2_df.index:
    table2_df.drop(index='Total', inplace=True)   
    total = table2_df.sum(axis=0)
    total.name='Total'
    table2_df = table2_df.append(total)

else:
    total = table2_df.sum(axis=0)
    total.name='Total'
    table2_df = table2_df.append(total)

# replace NaN with ''
table2_df.replace(numpy.nan, '', inplace=True)
table2_df.replace(0.0, '', inplace=True)


In [1528]:
table2_df#['LNGImportCapacityBcm/y'][:-1].replace('',numpy.nan).sum()

Unnamed: 0,PipelineLengthKm,PipelineCostMillionEuro,LNGImportCapacityBcm/y,LNGTerminalCostMillionEuro,TotalCostMillionEuro
Austria,28.0,106.0,,,106.0
Belgium,,,8.16,116.0,116.0
Bulgaria,1766.16,2574.120651,,,2574.120651
Croatia,869.44,1197.74185,4.35,479.0,1676.74185
Cyprus,831.78,2436.30224,0.82,312.0,2748.30224
Czech Republic,,,,,
Denmark,423.99,1453.680001,0.07,12.941385,1466.621385
Estonia,,,2.45,400.0,400.0
Finland,,,0.14,100.0,100.0
France,,,10.61,3076.730475,3076.730475


# creating table 4 (nsew_eu)

In [1529]:
region_list = [western_eu,eastern_eu,southern_eu,northern_eu]
index_list = ['Western EU','Eastern EU','Southern EU','Northern EU']

table7_nsew = pandas.DataFrame(0, columns=['EstCostMillionEuroConstruction',
                                        'CapacityInBcm/yConstruction',
                                        'EstCostMillionEuroProposed',
                                        'CapacityInBcm/yProposed'], index=range(4))

terms_w_eu = pandas.DataFrame()
terms_n_eu = pandas.DataFrame()
terms_s_eu = pandas.DataFrame()
terms_e_eu = pandas.DataFrame()

pipes_w_eu = pandas.DataFrame()
pipes_n_eu = pandas.DataFrame()
pipes_s_eu = pandas.DataFrame()
pipes_e_eu = pandas.DataFrame()

for idx,row in table7_nsew.iterrows():
    
    # for proposed
    pipeline_cost = country_ratios_df_eu27.loc[(country_ratios_df_eu27.Status.isin(['Proposed'])) &
                           (country_ratios_df_eu27.Country.isin(region_list[idx]) &
                           (country_ratios_df_eu27.ProjectID.isin(import_projectids)))]['TotalEstCostEuroMillions'].sum()
    
    pipeline_capacity = pipes_df_eu_imports.loc[(pipes_df_eu_imports.ProjectID.isin(import_projectids)) &
                            (pipes_df_eu_imports.EndCountry.isin(region_list[idx])) &
                            (pipes_df_eu_imports.Status.isin(['Proposed']))]['CapacityBcm/y'].sum()

    terminal_cost = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Proposed'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]['CostMillionEuroTotal'].sum()
    terminal_capacity = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Proposed'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]['CapacityInBcm/y'].sum()
    
    which_pipelines_df = country_ratios_df_eu27.loc[(country_ratios_df_eu27.Status.isin(['Proposed'])) &
                           (country_ratios_df_eu27.Country.isin(region_list[idx]) &
                           (country_ratios_df_eu27.ProjectID.isin(import_projectids)))]
    which_terminals_df = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Proposed'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]
    
    if index_list[idx]=='Western EU':
        terms_w_eu = terms_w_eu.append(which_terminals_df)
        pipes_w_eu = pipes_w_eu.append(which_pipelines_df)
    if index_list[idx]=='Northern EU':
        terms_n_eu = terms_n_eu.append(which_terminals_df)
        pipes_n_eu = pipes_n_eu.append(which_pipelines_df)
    if index_list[idx]=='Eastern EU':
        terms_e_eu = terms_e_eu.append(which_terminals_df)
        pipes_e_eu = pipes_e_eu.append(which_pipelines_df)
    if index_list[idx]=='Southern EU':
        terms_s_eu = terms_s_eu.append(which_terminals_df)
        pipes_s_eu = pipes_s_eu.append(which_pipelines_df)
    
    table7_nsew.loc[idx, 'EstCostMillionEuroProposed'] += terminal_cost+pipeline_cost
    table7_nsew.loc[idx, 'CapacityInBcm/yProposed'] += terminal_capacity+pipeline_capacity

    
    # for construction
    pipeline_cost = country_ratios_df_eu27.loc[(country_ratios_df_eu27.Status.isin(['Construction'])) &
                           (country_ratios_df_eu27.Country.isin(region_list[idx]) &
                           (country_ratios_df_eu27.ProjectID.isin(import_projectids)))]['TotalEstCostEuroMillions'].sum()
    
    pipeline_capacity = pipes_df_eu_imports.loc[(pipes_df_eu_imports.ProjectID.isin(import_projectids)) &
                            (pipes_df_eu_imports.EndCountry.isin(region_list[idx])) &
                            (pipes_df_eu_imports.Status.isin(['Construction']))]['CapacityBcm/y'].sum()

    terminal_cost = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Construction'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]['CostMillionEuroTotal'].sum()
    terminal_capacity = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Construction'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]['CapacityInBcm/y'].sum()
    
    which_pipelines_df = country_ratios_df_eu27.loc[(country_ratios_df_eu27.Status.isin(['Construction'])) &
                           (country_ratios_df_eu27.Country.isin(region_list[idx]) &
                           (country_ratios_df_eu27.ProjectID.isin(import_projectids)))]
    which_terminals_df = terms_df_eu27_import_future.loc[(terms_df_eu27_import_future.Status.isin(['Construction'])) &
                                (terms_df_eu27_import_future.Country.isin(region_list[idx]))]
    
    if index_list[idx]=='Western EU':
        terms_w_eu = terms_w_eu.append(which_terminals_df)
        pipes_w_eu = pipes_w_eu.append(which_pipelines_df)
    if index_list[idx]=='Northern EU':
        terms_n_eu = terms_n_eu.append(which_terminals_df)
        pipes_n_eu = pipes_n_eu.append(which_pipelines_df)
    if index_list[idx]=='Eastern EU':
        terms_e_eu = terms_e_eu.append(which_terminals_df)
        pipes_e_eu = pipes_e_eu.append(which_pipelines_df)
    if index_list[idx]=='Southern EU':
        terms_s_eu = terms_s_eu.append(which_terminals_df)
        pipes_s_eu = pipes_s_eu.append(which_pipelines_df)
        
    table7_nsew.loc[idx, 'EstCostMillionEuroConstruction'] += terminal_cost+pipeline_cost
    table7_nsew.loc[idx, 'CapacityInBcm/yConstruction'] += terminal_capacity+pipeline_capacity  

table7_nsew.index = index_list

# add trans-adriatic
table7_nsew.loc['Southern EU','EstCostMillionEuroProposed'] += 1035.

In [1530]:
table7_nsew

Unnamed: 0,EstCostMillionEuroConstruction,CapacityInBcm/yConstruction,EstCostMillionEuroProposed,CapacityInBcm/yProposed
Western EU,0.0,0.0,7204.346174,61.73
Eastern EU,925.095826,13.3,4794.529851,65.76
Southern EU,312.0,0.82,11369.154258,34.66
Northern EU,1849.805715,0.14,12.941385,0.07


## which go into Table 7

In [1531]:
terms_n_eu[['Country','Wiki','Name','Facility',
            'Status','CostMillionEuroTotal']].sort_values(['Status','Country','CostMillionEuroTotal'], ascending=['False','True','True']).replace(numpy.nan,'').style.hide_index()

Country,Wiki,Name,Facility,Status,CostMillionEuroTotal
Finland,https://www.gem.wiki/Hamina_LNG_Terminal,Hamina LNG Terminal,Import,Construction,100.0
Denmark,https://www.gem.wiki/Frederikshavn_LNG_Terminal,Frederikshavn LNG Terminal,Import,Proposed,12.941385
Finland,https://www.gem.wiki/Rauma_LNG_terminal,Rauma LNG Terminal,Import,Proposed,


In [1532]:
pipes_w_eu[['Country', 'PipelineName', 'SegmentName', 'Status',
            'MergedKmByCountry','TotalEstCostEuroMillions']].sort_values(['Status','Country',
                                                                          'TotalEstCostEuroMillions']).replace(numpy.nan,'').style.hide_index()

Country,PipelineName,SegmentName,Status,MergedKmByCountry,TotalEstCostEuroMillions


# creating figure 1 data

sum of Table 1 columns...

In [1533]:
terms_df_eu27_import_future.loc[terms_df_eu27_import_future.Status.isin(['Proposed'])].groupby('Country')['CostMillionEuroTotal'].sum().fillna(0)

Country
Belgium         116.000000
Croatia         479.000000
Denmark          12.941385
Estonia         400.000000
Finland           0.000000
France         3076.730475
Germany        2085.964912
Greece          590.000000
Ireland        1648.335398
Latvia          110.000000
Malta             0.000000
Netherlands     277.315388
Poland          620.000000
Romania        1508.595713
Spain           397.500000
Name: CostMillionEuroTotal, dtype: float64

In [1534]:
figure1_table_columns = ['TotalConstructionCostMillionEuro','TotalProposedCostMillionEuro','Total']
figure1_table_df = pandas.DataFrame(0, columns=figure1_table_columns, index=region_df_touse.Country,)


# for proposed
figure1_table_df['TotalProposedCostMillionEuro'] = \
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(
    ['Proposed'])].groupby(['Country'])['TotalEstCostEuroMillions'].sum()

figure1_table_df['TotalProposedCostMillionEuro'] = \
figure1_table_df['TotalProposedCostMillionEuro'].add( \
terms_df_eu27_import_future.loc[terms_df_eu27_import_future.Status.isin(
    ['Proposed'])].groupby(['Country'])['CostMillionEuroTotal'].sum(), \
                                                    fill_value=0)

figure1_table_df.loc['Italy','TotalProposedCostMillionEuro'] += 1035/2.
figure1_table_df.loc['Greece','TotalProposedCostMillionEuro'] += 1035/2.

# for construction
figure1_table_df['TotalConstructionCostMillionEuro'] = \
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(
    ['Construction'])].groupby(['Country'])['TotalEstCostEuroMillions'].sum()

figure1_table_df['TotalConstructionCostMillionEuro'] = \
figure1_table_df['TotalConstructionCostMillionEuro'].add( \
terms_df_eu27_import_future.loc[terms_df_eu27_import_future.Status.isin(
    ['Construction'])].groupby(['Country'])['CostMillionEuroTotal'].sum(), \
                                                         fill_value=0, axis=0)

figure1_table_df['Total'] = \
figure1_table_df[['TotalConstructionCostMillionEuro','TotalProposedCostMillionEuro']].sum(axis=1)
figure1_table_df.sort_values(['Total','Country'], ascending=[False, True], inplace=True)

figure1_table_df.replace(numpy.nan, '', inplace=True)
figure1_table_df.replace(0, '', inplace=True)

In [1535]:
figure1_table_df

Unnamed: 0_level_0,TotalConstructionCostMillionEuro,TotalProposedCostMillionEuro,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Greece,35.021739,8550.390929,8585.412669
Romania,431.381128,4898.627517,5330.008646
Italy,794.657897,4431.892396,5226.550293
Poland,3436.125278,1677.303045,5113.428323
Spain,,4926.863316,4926.863316
France,,3076.730475,3076.730475
Cyprus,312.0,2436.30224,2748.30224
Bulgaria,269.835555,2304.285095,2574.120651
Germany,,2085.964912,2085.964912
Ireland,,1737.350552,1737.350552


# report numbers

In [1539]:
# key points
# num of gas pipelines under const in EU
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Construction'])]['MergedKmByCountry'].sum()

3190.1099999999997

In [1540]:
# total cost
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Construction'])]['TotalEstCostEuroMillions'].sum()

6502.8025380257795

In [1541]:
# number of distinct projects
unique_const = list(set(country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Construction'])]['ProjectID']))
print(pipes_df_orig.loc[pipes_df_orig.ProjectID.isin(unique_const)]['LengthMergedKm'].mean())
print(pipes_df_orig.loc[pipes_df_orig.ProjectID.isin(unique_const)]['LengthMergedKm'].size)

206.125
16


In [1542]:
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Proposed'])]['MergedKmByCountry'].sum()

12541.560000000001

In [1543]:
country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Proposed'])]['TotalEstCostEuroMillions'].sum()

29743.288189856125

In [1544]:
# number of distinct projects
unique_prop = list(set(country_ratios_df_eu27.loc[country_ratios_df_eu27.Status.isin(['Proposed'])]['ProjectID']))
print(pipes_df_orig.loc[pipes_df_orig.ProjectID.isin(unique_prop)]['LengthMergedKm'].mean())
print(pipes_df_orig.loc[pipes_df_orig.ProjectID.isin(unique_prop)]['LengthMergedKm'].size)

326.81209677419355
62
