In [1]:
import pandas
import numpy
import pygsheets
import datetime
import pytz
import re

# for lat/lon checking
import geopandas
import shapely

In [2]:
gc = pygsheets.authorize(service_account_env_var='GDRIVE_API_CREDENTIALS')
spreadsheet = gc.open_by_key('1tcS6Wd-Wp-LTDpLzFgJY_RSNDnbyubW3J_9HKIAys4A')

terms_df_orig = spreadsheet.worksheet('title', 'Terminals').get_as_df(start='A3')

# drop any empty/imcomplete rows (those without a wiki page)
terms_df_orig = terms_df_orig.loc[terms_df_orig.Wiki != '']

#get other relevant sheets
owners_df_orig = spreadsheet.worksheet('title', 'Terminal operators/owners (1/3)').get_as_df(start='A2')
owners_df_orig.set_index('ComboID', inplace=True)

owner_parent_df_orig = spreadsheet.worksheet('title', 'Owner–parent relationships (2/3)').get_as_df(start='A1')
owner_parent_df_orig.set_index('Owner', inplace=True)
owner_parent_df_orig = owner_parent_df_orig.loc[owner_parent_df_orig.index!='']

parent_metadata_df = spreadsheet.worksheet('title', 'Parent metadata (3/3)').get_as_df(start='A2')
parent_metadata_df.set_index('Parent', inplace=True)

In [3]:
terms_df_orig.replace('--', numpy.nan, inplace=True)

In [4]:
region_df_orig = spreadsheet.worksheet('title', 'Country dictionary').get_as_df(start='A2')
#region_df_agt = region_df_orig.loc[region_df_orig['AsiaGasTracker']=='Yes']

region_df_touse = region_df_orig.copy()
#region_df_agt.copy()

## dataframes for owners and such

In [5]:
owners_df_touse = owners_df_orig.loc[owners_df_orig['Country'].isin(region_df_touse['Country'].tolist())]
terms_df_touse = terms_df_orig.loc[terms_df_orig['Country'].isin(region_df_touse['Country'].tolist())]

### check for unused parents in the owners/parents tab

In [6]:
# combine all Parent1, Parent2, ... columns, get unique values
# look at all Parent values in the next tab over, do a set on each and see diffs

## create list of owner and parent column names
owner_pct_col_names = []
owner_col_names = []

parent_pct_col_names = []
parent_col_names = []

for num in range(1,10+1):
    owner_pct_col = f'Owner{num}%'
    owner_pct_col_names.append(owner_pct_col)
    
    owner_col = f'Owner{num}'
    owner_col_names.append(owner_col)
    
    parent_pct_col = f'Parent{num}%'
    parent_pct_col_names.append(parent_pct_col)
    
    parent_col = f'Parent{num}'
    parent_col_names.append(parent_col)


#parent_metadata_df

In [7]:
parent_list = parent_metadata_df.index.tolist()

In [8]:
owner_list = owner_parent_df_orig.index.tolist()

In [9]:
parent_list_from_owners = owner_parent_df_orig[parent_col_names]
# make all parent1, parent2, etc. into a single list to compare to parent_list
parent_list_from_owners = list(set(parent_list_from_owners.values.flatten().tolist()))
# remove the empty parent
parent_list_from_owners.remove('')

compare parent_list (from metadata) to parent_list_from_owners (from parent-owner relationships sheet)

In [10]:
# THIS IS THE LIST TO REMOVE from parent_metadata
list(set(parent_list)-set(parent_list_from_owners))

[]

In [11]:
# THIS IS THE LIST TO ADD TO parent_metadata relationships (should be red in the p-o relationships sheet)
list(set(parent_list_from_owners)-set(parent_list))

[]

## how many new projects are there?

using ComboID to understand this

In [12]:
# import the OLDER version of the database; use the previous version
gc = pygsheets.authorize(service_account_env_var='GDRIVE_API_CREDENTIALS')
# title of sheet is: 
# Copy of LNG Terminals - main - 
# from August 19, 11:32 AM - July 2022 version of map/database - for May 2023 Asia Gas Tracker update
spreadsheet_previous = gc.open_by_key('1FVuw76YU-jmAzfSWJ2ixwBQB_wXEK9aGCa4Q6OCOUzY') 

terms_df_orig_previous = spreadsheet_previous.worksheet('title', 'Terminals').get_as_df(start='A2')

# drop any empty/imcomplete rows (those without a wiki page)
terms_df_orig_previous = terms_df_orig_previous.loc[terms_df_orig_previous.Wiki != '']
terms_df_orig_previous.replace('--', numpy.nan, inplace=True)

## terminal IDs that have been deleted, added

In [13]:
previous_terminal_ids = terms_df_orig_previous.TerminalID.tolist()
current_terminal_ids = terms_df_orig.TerminalID.tolist()

# deleted
print("TerminalIDs that have been deleted from previous to current version")
print(sorted(list(set(previous_terminal_ids)-set(current_terminal_ids))))

# added
print("TerminalIDs that have been added from previous to current version")
print(sorted(list(set(current_terminal_ids)-set(previous_terminal_ids))))

TerminalIDs that have been deleted from previous to current version
['T0425', 'T0508', 'T0582', 'T0620', 'T0634', 'T0696', 'T0719', 'T0806', 'T0861', 'T0864', 'T0867', 'T0868', 'T0872', 'T0908', 'T0919', 'T0920', 'T0954', 'T1062']
TerminalIDs that have been added from previous to current version
['T1090', 'T1091', 'T1092', 'T1094', 'T1095', 'T1096', 'T1097', 'T1098', 'T1099', 'T1100', 'T1101', 'T1102', 'T1103', 'T1104', 'T1105', 'T1106', 'T1107', 'T1108', 'T1109', 'T1110', 'T1111', 'T1112', 'T1113', 'T1114', 'T1115', 'T1116', 'T1117', 'T1118', 'T1119', 'T1120', 'T1121', 'T1122', 'T1123', 'T1124', 'T1125', 'T1126', 'T1127', 'T1128', 'T1129', 'T1130', 'T1131', 'T1132', 'T1133', 'T1134', 'T1135', 'T1136', 'T1138', 'T1139', 'T1140', 'T1141', 'T1142', 'T1143', 'T1144', 'T1145', 'T1146', 'T1147', 'T1148', 'T1149']


In [14]:
previous_combo_ids = terms_df_orig_previous.ComboID.tolist()
current_combo_ids = terms_df_orig.ComboID.tolist()

# deleted
print("ComboIDs that have been deleted from previous to current version")
print(sorted(list(set(previous_combo_ids)-set(current_combo_ids))))

# added
print("ComboIDs that have been added from previous to current version")
print(sorted(list(set(current_combo_ids)-set(previous_combo_ids))))

ComboIDs that have been deleted from previous to current version
['T021903', 'T021904', 'T021905', 'T042500', 'T046402', 'T046403', 'T050800', 'T051602', 'T051603', 'T058200', 'T062001', 'T063400', 'T069600', 'T071901', 'T078902', 'T080601', 'T080904', 'T080905', 'T080906', 'T086100', 'T086400', 'T086700', 'T086800', 'T087200', 'T090800', 'T091901', 'T092001', 'T095400', 'T106200']
ComboIDs that have been added from previous to current version
['T021406', 'T021611', 'T021612', 'T022111', 'T024007', 'T030303', 'T030304', 'T030305', 'T030902', 'T031704', 'T031801', 'T034902', 'T038401', 'T039102', 'T039203', 'T039901', 'T040201', 'T040202', 'T040502', 'T041002', 'T041102', 'T041701', 'T042902', 'T043002', 'T043103', 'T044601', 'T045701', 'T047401', 'T048601', 'T048801', 'T050403', 'T055701', 'T056201', 'T056202', 'T070702', 'T070703', 'T071101', 'T071501', 'T073101', 'T079301', 'T079401', 'T079601', 'T085901', 'T085902', 'T095601', 'T106903', 'T107701', 'T107702', 'T109001', 'T109101', '

print all deleted ComboIDs

In [15]:
for i in sorted(list(set(previous_combo_ids)-set(current_combo_ids))):
    print(i)

T021903
T021904
T021905
T042500
T046402
T046403
T050800
T051602
T051603
T058200
T062001
T063400
T069600
T071901
T078902
T080601
T080904
T080905
T080906
T086100
T086400
T086700
T086800
T087200
T090800
T091901
T092001
T095400
T106200


In [16]:
list(set(previous_combo_ids)-set(current_combo_ids)).__len__()

29

print terminal id list that has been ADDED

In [17]:
# for i in sorted(list(set(current_terminal_ids)-set(previous_terminal_ids))):
#     print(i)

In [18]:
", ".join(sorted(list(set(current_terminal_ids)-set(previous_terminal_ids))))

'T1090, T1091, T1092, T1094, T1095, T1096, T1097, T1098, T1099, T1100, T1101, T1102, T1103, T1104, T1105, T1106, T1107, T1108, T1109, T1110, T1111, T1112, T1113, T1114, T1115, T1116, T1117, T1118, T1119, T1120, T1121, T1122, T1123, T1124, T1125, T1126, T1127, T1128, T1129, T1130, T1131, T1132, T1133, T1134, T1135, T1136, T1138, T1139, T1140, T1141, T1142, T1143, T1144, T1145, T1146, T1147, T1148, T1149'

In [19]:
sorted(list(set(current_terminal_ids)-set(previous_terminal_ids))).__len__()

58

In [20]:
# for i in sorted(list(set(current_combo_ids)-set(previous_combo_ids))):
#     print(i)

In [21]:
", ".join(sorted(list(set(current_combo_ids)-set(previous_combo_ids))))

'T021406, T021611, T021612, T022111, T024007, T030303, T030304, T030305, T030902, T031704, T031801, T034902, T038401, T039102, T039203, T039901, T040201, T040202, T040502, T041002, T041102, T041701, T042902, T043002, T043103, T044601, T045701, T047401, T048601, T048801, T050403, T055701, T056201, T056202, T070702, T070703, T071101, T071501, T073101, T079301, T079401, T079601, T085901, T085902, T095601, T106903, T107701, T107702, T109001, T109101, T109201, T109401, T109500, T109600, T109700, T109800, T109900, T110000, T110100, T110101, T110200, T110300, T110400, T110501, T110502, T110600, T110701, T110800, T110900, T111000, T111100, T111201, T111202, T111203, T111301, T111400, T111500, T111600, T111700, T111801, T111900, T112010, T112100, T112200, T112301, T112401, T112501, T112600, T112700, T112801, T112901, T112902, T113001, T113100, T113200, T113300, T113400, T113500, T113600, T113800, T113900, T114000, T114100, T114101, T114200, T114300, T114400, T114500, T114501, T114600, T114700, 

In [22]:
sorted(list(set(current_combo_ids)-set(previous_combo_ids))).__len__()

113

In [23]:
terms_df_orig.shape

(1275, 82)

In [24]:
terms_df_orig_previous.shape

(1191, 77)

In [25]:
1275-1191

84

In [26]:
113-29

84

# check lat/lon locations

## import eez stuff

In [27]:
# from https://www.marineregions.org/downloads.php
# in the section "Marine and land zones: the union of world country boundaries and EEZ's"
#eez_file = '/Users/baird/Dropbox/_gis-data/eez/EEZ_land_union_v2_201410/EEZ_land_v2_201410.shp'
eez_file = '/Users/baird/Dropbox/_gis-data/eez/EEZ_land_union_v3_202003/EEZ_Land_v3_202030.shp'

eez_and_land_boundaries_gdf = geopandas.read_file(eez_file)
eez_and_land_boundaries_gdf = eez_and_land_boundaries_gdf.set_index('UNION')
#eez_and_land_boundaries_gdf = eez_and_land_boundaries_gdf.set_index('SOVEREIGN1')
#eez_4087 = eez_and_land_boundaries.to_crs('epsg:4087')

In [28]:
eez_and_land_boundaries_gdf.head()

Unnamed: 0_level_0,MRGID_EEZ,TERRITORY1,MRGID_TER1,ISO_TER1,UN_TER1,SOVEREIGN1,MRGID_SOV1,ISO_SOV1,UN_SOV1,TERRITORY2,...,UN_TER3,SOVEREIGN3,MRGID_SOV3,ISO_SOV3,UN_SOV3,POL_TYPE,Y_1,x_1,AREA_KM2,geometry
UNION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Estonia,5675.0,Estonia,2110,EST,233.0,Estonia,2110,EST,233,,...,,,0.0,,,Union EEZ and country,58.7153,24.40898,81842,"MULTIPOLYGON (((26.61348 57.50776, 26.60780 57..."
Mayotte,48944.0,Mayotte,8606,MYT,175.0,France,17,FRA,250,Mayotte,...,0.0,,0.0,,0.0,Overlapping claim,-13.21377,45.30528,67285,"POLYGON ((46.68598 -12.80939, 46.64361 -12.933..."
Overlapping claim Qatar / Saudi Arabia / United Arab Emirates,50170.0,Qatar,8468,QAT,634.0,Qatar,8468,QAT,634,Saudi Arabia,...,784.0,United Arab Emirates,2206.0,ARE,784.0,Overlapping claim,24.69243,51.59986,126,"POLYGON ((51.68368 24.68507, 51.59039 24.66509..."
Cameroon,8475.0,Cameroon,2170,CMR,120.0,Cameroon,2170,CMR,120,,...,,,0.0,,,Union EEZ and country,5.62533,12.63665,480130,"POLYGON ((16.05693 1.64932, 16.05571 1.65413, ..."
Finland,5676.0,Finland,2106,FIN,246.0,Finland,2106,FIN,246,,...,,,0.0,,,Union EEZ and country,63.97904,25.44114,420076,"MULTIPOLYGON (((19.11472 60.30806, 19.11433 60..."


special cases for Macao, Hong Kong

In [225]:
nat_earth_file = '/Users/baird/Dropbox/_gis-data/_natural_earth_data/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp'
nat_earth_gdf = geopandas.read_file(nat_earth_file)

# pull out shapely geometry polygons and multipolygons
china_geom = eez_and_land_boundaries_gdf.loc[eez_and_land_boundaries_gdf.index=='China','geometry'].values[0]
hk_geom = nat_earth_gdf.loc[nat_earth_gdf.ADMIN=='Hong Kong S.A.R.']['geometry'].values[0]
macao_geom = nat_earth_gdf.loc[nat_earth_gdf.ADMIN=='Macao S.A.R']['geometry'].values[0]

china_new_geom = china_geom - china_geom.intersection(hk_geom)
china_new_geom = china_new_geom - china_new_geom.intersection(macao_geom)

replace original China in EEZ file with new geometry minus Macao, Hong Kong

In [226]:
empty_row_hk = geopandas.GeoDataFrame([[numpy.nan]*eez_and_land_boundaries_gdf.columns.size],
                             columns=eez_and_land_boundaries_gdf.columns, index=['Hong Kong'])
empty_row_hk['geometry'] = hk_geom

empty_row_macao = geopandas.GeoDataFrame([[numpy.nan]*eez_and_land_boundaries_gdf.columns.size],
                             columns=eez_and_land_boundaries_gdf.columns, index=['Macao'])
empty_row_macao['geometry'] = macao_geom

# add geometries to these rows
eez_and_land_boundaries_gdf = pandas.concat([eez_and_land_boundaries_gdf, empty_row_hk], axis=0)
eez_and_land_boundaries_gdf = pandas.concat([eez_and_land_boundaries_gdf, empty_row_macao], axis=0)
# replace with new version of China
eez_and_land_boundaries_gdf.loc[['China'],'geometry'] = geopandas.GeoDataFrame(index=['China'], geometry=[china_new_geom]).geometry

  return GeometryArray(data, crs=_get_common_crs(to_concat))
  return GeometryArray(data, crs=_get_common_crs(to_concat))


make sure all countries in terminals database are in eez index list

In [227]:
unique_country_list = list(set(terms_df_orig.Country.tolist()))
eez_country_list = eez_and_land_boundaries_gdf.index.tolist()
for country in unique_country_list:
    if country not in eez_and_land_boundaries_gdf.index.tolist():
        print(country)

Timor-Leste
Mauritius
Türkiye
Côte d'Ivoire


change the index names to match these above

In [229]:
#sorted(eez_and_land_boundaries_gdf.index.tolist())
#Timor-Leste is called East Timor in EEZ
#Türkiye is called Turkey in EEZ
#Mauritius is called Republic of Mauritius in EEZ
#Côte d'Ivoire is called Ivory Coast in EEZ

index_rename = {'East Timor':'Timor-Leste',
                'Turkey':'Türkiye',
                'Republic of Mauritius':'Mauritius',
                'Ivory Coast':"Côte d'Ivoire"}

eez_and_land_boundaries_gdf_corrected_index = eez_and_land_boundaries_gdf.rename(index=index_rename)

In [230]:
#eez_and_land_boundaries_gdf_corrected_index.index.tolist()

below should print the ComboIDs that aren't located in the proper country

In [236]:
# pull out only terminals that have numbers, and don't have TBD or Unknown in their Lat value
terms_df_orig_with_latlon = terms_df_orig.loc[~terms_df_orig.Latitude.isin(['TBD','Unknown'])]
for i,row in terms_df_orig_with_latlon.iterrows():
    terminal_loc = shapely.Point(row.Longitude,row.Latitude)
    terminal_country = row.Country
    eez_country_geometry = eez_and_land_boundaries_gdf_corrected_index.loc[terminal_country].geometry
    if terminal_loc.within(eez_country_geometry):
        pass # it's correctly within the country
    elif terminal_country in eez_and_land_boundaries_gdf_corrected_index.SOVEREIGN1.tolist():
        if terminal_loc.within(eez_and_land_boundaries_gdf_corrected_index.loc[
                               eez_and_land_boundaries_gdf_corrected_index.SOVEREIGN1==terminal_country].geometry).any():
            # print(row.TerminalName)
            # print(terminal_country)
            pass
    else:
        print(row.ComboID, row.Country)

T038800 Hong Kong


NOTES:

Looks like the only terminal that isn't in the right place is the Hong Kong FSRU (T038800); we have the coordinates marked as "exact" so I'm going to assume it's actually ok