# Writing to Clever Cloud PostGIS db

In [1]:
import psycopg2 as pg
from psycopg2 import sql
import pandas as pd
import os

In [2]:
db_host = os.environ.get('DB_HOST_CLOUD')
db_port = os.environ.get('DB_PORT_CLOUD')
db_user = os.environ.get('DB_USER_CLOUD')
db_password = os.environ.get('DB_PASSWORD_CLOUD')
db_name = os.environ.get('DB_NAME_CLOUD')

db_url = f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}'

In [3]:
# #create table from dataframe - Clever Cloud (Execution time ~= 3 min)
# import pandas as pd
# # read the csv file into a pandas dataframe, replacing NaN with None
# df = pd.read_csv('final_output.csv', index_col=['cep_id'], dtype={'pa': 'Int64', 'eco': 'Int64', 'country': 'Int64'}).replace({pd.NA: None})
# from sqlalchemy import create_engine
# engine = create_engine(db_url)
# df.to_sql('cep_water', engine, if_exists='replace')

In [4]:
# connect to the database
conn = pg.connect(
    database=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)

In [5]:
# check size of database
cursor = conn.cursor()
cursor.execute('SELECT * FROM cep_water LIMIT 5')
result = cursor.fetchall()
for row in result:
    print(row)
cursor.close()

(1, 895.792133, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 171, 'Lithuania', 'LTU', 80412, 'Central European mixed forests', False, 0, None, False)
(2, 1366843086810.8289, 7194092084292.959, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 1, 'Antarctic', True, 0, None, False)
(4, 0.0, 1836981.041237, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 1, 'Antarctic', True, 555547601, 'South Georgia and South Sandwich Islands Marine Protected Area', True)
(6, 632877592038.22, 10387835827531.256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 2, 'Antarctic Polar Front', True, 0, None, False)
(7, 0.0, 11679257268.343046, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 2, 'Antarctic Polar Front', True, 345888, 'Terres Australes Françaises', True)


# Data sanity checks
- missing values
- duplicates
- data types
- data ranges
- is marine (all permanent water bodies are marine)
### case studies https://www.nature.org/content/dam/tnc/nature/en/documents/Pathway_for_Inland_Waters_Nov_2022.pdf

In [6]:
# check for any missing country (country = 0)
cursor = conn.cursor()
cursor.execute('SELECT * FROM cep_water WHERE country = 0')
result = cursor.fetchall()
for row in result:
    print(row)
cursor.close()


In [7]:
# check for duplicates
cursor = conn.cursor()
# check for duplicate cep_id and pa and eco
cursor.execute('SELECT cep_id, pa, eco, COUNT(*) FROM cep_water GROUP BY cep_id, pa, eco HAVING COUNT(*) > 1')
result = cursor.fetchall()
for row in result:
    print(row)
cursor.close()

In [8]:
# check is_marine has large band 1 area (band_1_area > 1000)

(8, 0.0, 5916302048.817965, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 2, 'Antarctic Polar Front', True, 345888, 'Terres Australes Françaises', True)
(8, 0.0, 5916302048.817965, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1, 'Area Beyond National Jurisdiction', 'ABNJ', 2, 'Antarctic Polar Front', True, 555697868, 'French Austral Lands and Seas', True)


In [10]:
conn.close()

In [9]:
raise SystemExit

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Summaries

In [None]:
def get_summed_bands_by_col(conn, group_by_col = "country_name",is_pa=False, is_marine=False, convert_to_meters=False, ):
    try:
        # create a cursor object using the cursor() method
        cursor = conn.cursor()
        # query all rows transition_0  to transition_10, filter where pa is 0 and where marine is false, and group by country_name and sum the areas for each transition band
        bands_SUM = [f'SUM(transition_{i}) as "transition_{i}"' for i in range(11)]
        query = sql.SQL('SELECT {group_by_column}, {bands} FROM cep_water WHERE is_protected = {is_pa} AND is_marine = {is_marine} GROUP BY {group_by_column}').format(
            bands=sql.SQL(', ').join(map(sql.SQL, bands_SUM)),
            is_pa=sql.Literal(is_pa),
            is_marine=sql.Literal(is_marine),
            group_by_column=sql.Identifier(group_by_col)
        )
        # #print query as string
        # print(query.as_string(conn))                                                                                                                    
        # filter where 
        cursor.execute(query)
        result = cursor.fetchall()
        cursor.close()

        # convert to df and set index to group_by_col
        df = pd.DataFrame(result, columns=[group_by_col]+[f'transition_{i}' for i in range(11)])
        df.set_index(group_by_col, inplace=True)
        # convert to meters
        if convert_to_meters:
            df = df.apply(lambda x: x/1000000)
        return df
        
        #return result
    except Exception as e:
        print("error: ", e)
    finally:
        conn.rollback()

In [None]:
df_protected = get_summed_bands_by_col(conn, group_by_col = "country_name",is_pa=True, is_marine=False, convert_to_meters=True)
df_unprotected = get_summed_bands_by_col(conn, group_by_col = "country_name",is_pa=False, is_marine=False, convert_to_meters=True)
df_terrestrial_eco = get_summed_bands_by_col(conn, group_by_col = "eco",is_pa=False, is_marine=False, convert_to_meters=True)

In [None]:
df_terrestrial_eco

Unnamed: 0_level_0,transition_0,transition_1,transition_2,transition_3,transition_4,transition_5,transition_6,transition_7,transition_8,transition_9,transition_10
eco,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
9998,1.632937e+04,798938.098790,7224.348308,31084.582217,470.584544,1060.467029,3797.589527,725.706714,1207.040411,331.338682,991.485527
9999,1.182636e+06,4810.382056,2114.682572,42.378275,65.780914,316.708090,11.091188,436.949940,61.031645,16.327289,43.274022
10101,1.969833e+03,16.566558,0.769555,0.016913,1.366848,1.554482,0.123772,0.232176,0.565811,0.002306,0.362095
10102,5.796820e+03,251.399760,8.482996,1.012729,9.582854,12.788575,11.809761,6.589304,6.375422,2.317255,37.306834
10103,2.063189e+03,84.463036,4.412260,0.117692,0.704615,2.974527,0.595379,0.227694,0.576150,0.274599,1.662997
...,...,...,...,...,...,...,...,...,...,...,...
81330,7.325625e+05,1231.018824,596.801376,163.570074,580.264028,1868.600340,1139.153184,44.418426,139.856067,219.953600,3779.707089
81331,8.176799e+04,0.000000,0.134979,0.000000,0.007863,3.198159,0.001448,0.000000,0.000000,0.000000,0.050380
81332,5.010596e+04,0.000000,0.006296,0.000000,0.031547,1.008727,0.010568,0.000000,0.000000,0.000000,2.094448
81333,4.324528e+04,46.469397,8.191076,0.257457,6.894867,22.154596,3.182218,9.422927,0.706352,0.839800,7.665432


In [None]:
df_protected

Unnamed: 0_level_0,transition_0,transition_1,transition_2,transition_3,transition_4,transition_5,transition_6,transition_7,transition_8,transition_9,transition_10
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Afghanistan,2.310425e+04,143.424503,63.373226,13.420686,49.486765,155.167790,72.207952,5.306655,19.766028,20.811703,170.357614
Albania,5.189864e+03,660.539356,22.283697,7.219132,19.038880,27.731763,8.607468,2.080579,7.619051,1.627974,11.543785
Algeria,1.334345e+06,56.964036,12.561279,1.529621,537.157083,1493.158411,222.221807,4.094169,5.686030,8.596214,1090.141123
American Samoa,2.029997e+01,12.601353,0.408139,0.001492,0.101468,0.177578,0.020146,0.068642,0.035811,0.008954,0.028354
Andorra,2.271526e+02,0.243387,0.029635,0.005136,0.026222,0.109460,0.001711,0.009121,0.092945,0.001710,0.019962
...,...,...,...,...,...,...,...,...,...,...,...
Wallis and Futuna,2.996855e-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Western Sahara,3.090867e+00,0.000000,0.000000,0.000000,0.000000,0.015130,0.000000,0.000000,0.000000,0.000000,0.015850
Yemen,4.868245e+03,129.904302,2.435425,0.470187,4.506364,8.210006,3.325082,0.788339,2.869893,0.219647,5.475370
Zambia,3.361485e+05,4328.872633,119.927124,549.867988,972.540352,4872.798450,374.284818,99.712087,87.166460,50.232905,2654.023080


In [None]:
df_unprotected

Unnamed: 0_level_0,transition_0,transition_1,transition_2,transition_3,transition_4,transition_5,transition_6,transition_7,transition_8,transition_9,transition_10
country_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Afghanistan,6.110042e+05,194.299419,174.080562,756.569597,243.086282,538.600089,654.626390,12.474034,136.500569,2005.066034,2363.970579
Albania,2.311214e+04,104.126763,45.352836,2.873812,16.989635,74.791099,8.920121,1.903561,8.417134,4.609989,29.551732
Algeria,1.052304e+06,160.332636,166.729757,14.034576,33.258439,184.815442,31.994200,15.227313,17.016089,82.283510,760.328626
American Samoa,1.064722e+02,66.737494,0.989355,0.017907,0.470759,0.703576,0.214885,0.208895,0.176817,0.017908,0.138031
Andorra,3.253703e+02,0.314543,0.033634,0.006267,0.030212,0.098074,0.014830,0.003990,0.108227,0.013674,0.011404
...,...,...,...,...,...,...,...,...,...,...,...
Western Sahara,2.680124e+05,4.408739,1.213588,0.433325,16.149493,67.782724,33.521239,0.679920,1.326193,0.958819,406.156065
Western Sahara|Morocco,3.035324e+01,630.429590,2.032276,0.295963,5.609147,3.085063,1.082357,1.611273,3.620547,0.019915,0.643403
Yemen,4.488722e+05,569.692014,58.727171,0.971422,81.269691,238.646192,6.386063,11.264473,21.609921,0.495109,32.858277
Zambia,4.272760e+05,8441.024638,147.717483,191.130195,612.921596,2576.103914,160.502019,51.182249,96.323131,68.957469,1033.741003


In [None]:
conn.close()