### Retrieve historical data covering the #EgayPH, #FalconPH, and #Habagat effects in Metro Manila

This Jupyter notebook gathers the historical data during the days when #EgayPH, #FalconPH, and the Southwest Monsoon had major impact in Metro Manila. Currently gathering total rainfall per day, other data to be gathered are maximum wind speed, maximum wind gust, maximum atmospheric pressure, maximum temperature, and maximum heat index.

##### Import necessary libraries

In [10]:
import pandas as pd
import psycopg2
import os
import datetime as dt
from sqlalchemy import create_engine
from dotenv import find_dotenv, load_dotenv

##### Formatting for DataFrame to show all columns

In [11]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

##### Get the necessary variables for the database connection

In [12]:
dotenv_path = find_dotenv()

load_dotenv(dotenv_path)

DB_HOST = os.getenv("POSTGRES_HOST")
DB_NAME = os.getenv("POSTGRES_DATABASE")
DB_USER = os.getenv("POSTGRES_USERNAME")
DB_PASS = os.getenv("POSTGRES_PASSWORD")

##### USER DEFINED VARIABLES HERE!

In [13]:
start_date = str(dt.datetime(year=2023, month=7, day=22))
end_date = str(dt.datetime(year=2023, month=8, day=3))

start_date

'2023-07-22 00:00:00'

##### Fetch data regarding the stated dates from the database containing historical data

In [14]:
conn = psycopg2.connect(
    host = DB_HOST,
    database=DB_NAME,
    user=DB_USER,
    password=DB_PASS
)

cursor = conn.cursor()

# Define the table name
table_name = 'measurements'

# Check if the table exists
cursor.execute(
    """
    SELECT *
    FROM {0}
    WHERE obs_time_local >= %s AND obs_time_local <= %s
    """.format(table_name),
    (start_date, end_date)
)

rows = cursor.fetchall()

##### Store rows in a DataFrame

In [15]:
df = pd.DataFrame(rows, columns=[column[0] for column in cursor.description])
df = df.drop(labels=['id'], axis=1)
df = df.sort_values(by='obs_time_local')
df['obs_day'] = pd.to_datetime(df['obs_time_local'].dt.date)
df.tail(10)

Unnamed: 0,station_id,epoch,humidity_avg,humidity_high,humidity_low,obs_time_local,obs_time_utc,solar_radiation_high,uv_high,wind_direction_avg,dew_point_avg,dew_point_high,dew_point_low,heat_index_avg,heat_index_high,heat_index_low,precipitation_rate,precipitation_total,pressure_max,pressure_min,pressure_trend,qc_status,temperature_avg,temperature_high,temperature_low,wind_chill_avg,wind_chill_high,wind_chill_low,wind_gust_avg,wind_gust_high,wind_gust_low,wind_speed_avg,wind_speed_high,wind_speed_low,obs_day
20603,IPARAA10,1690991692,98.0,98.0,98.0,2023-08-02 23:54:52,2023-08-02 23:54:52+08:00,0.0,0.0,308.0,26.0,26.0,26.0,29.0,29.0,29.0,0.0,37.08,1011.85,1011.85,0.0,1,26.0,26.0,26.0,26.0,26.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-08-02
6829,IBULACAN2,1690991693,99.0,99.0,99.0,2023-08-02 23:54:53,2023-08-02 23:54:53+08:00,0.0,0.0,102.0,25.0,25.0,25.0,27.0,27.0,26.0,0.0,31.24,1006.77,1006.43,4.57,-1,25.0,25.0,25.0,25.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-08-02
14919,IMETROMA22,1690991694,99.0,99.0,99.0,2023-08-02 23:54:54,2023-08-02 23:54:54+08:00,0.0,0.0,21.0,25.0,25.0,25.0,27.0,27.0,27.0,0.0,10.92,1009.48,1009.48,0.0,1,26.0,26.0,26.0,26.0,26.0,26.0,8.0,9.0,6.0,6.0,9.0,3.0,2023-08-02
10319,IMAKAT1,1690991697,99.0,99.0,99.0,2023-08-02 23:54:57,2023-08-02 23:54:57+08:00,0.0,0.0,0.0,26.0,26.0,26.0,29.0,29.0,29.0,10.16,26.16,1010.33,1010.23,-1.24,1,26.0,26.0,26.0,26.0,26.0,26.0,1.0,14.0,0.0,0.0,0.0,0.0,2023-08-02
6830,IBULACAN2,1690991986,99.0,99.0,99.0,2023-08-02 23:59:46,2023-08-02 23:59:46+08:00,0.0,0.0,79.0,25.0,25.0,25.0,26.0,26.0,26.0,0.0,31.24,1006.77,1006.43,-5.08,-1,25.0,25.0,25.0,25.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-08-02
4990,IRIZBULA2,1690991990,94.0,94.0,94.0,2023-08-02 23:59:50,2023-08-02 23:59:50+08:00,0.0,0.0,194.0,24.0,24.0,24.0,26.0,26.0,26.0,1.19,86.79,1012.73,1012.33,3.81,1,25.0,25.0,25.0,25.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-08-02
17128,IMUNTI6,1690991992,97.0,97.0,97.0,2023-08-02 23:59:52,2023-08-02 23:59:52+08:00,0.0,0.0,348.0,26.0,26.0,26.0,30.0,30.0,30.0,0.0,7.87,1009.48,1009.48,0.0,1,27.0,27.0,26.0,27.0,27.0,26.0,4.0,6.0,0.0,4.0,5.0,0.0,2023-08-02
20604,IPARAA10,1690991992,98.0,98.0,98.0,2023-08-02 23:59:52,2023-08-02 23:59:52+08:00,0.0,0.0,308.0,26.0,26.0,26.0,29.0,29.0,29.0,0.0,37.08,1011.85,1011.85,0.0,1,26.0,26.0,26.0,26.0,26.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,2023-08-02
14920,IMETROMA22,1690991994,99.0,99.0,99.0,2023-08-02 23:59:54,2023-08-02 23:59:54+08:00,0.0,0.0,18.0,25.0,26.0,25.0,27.0,28.0,27.0,0.0,10.92,1009.48,1009.14,0.0,1,26.0,26.0,26.0,26.0,26.0,26.0,8.0,9.0,5.0,5.0,8.0,3.0,2023-08-02
10320,IMAKAT1,1690991998,99.0,100.0,99.0,2023-08-02 23:59:58,2023-08-02 23:59:58+08:00,0.0,0.0,0.0,26.0,26.0,26.0,30.0,30.0,29.0,10.41,26.42,1010.23,1010.09,-0.41,1,26.0,26.0,26.0,26.0,26.0,26.0,14.0,14.0,14.0,0.0,0.0,0.0,2023-08-02


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21802 entries, 20605 to 10320
Data columns (total 35 columns):
 #   Column                Non-Null Count  Dtype                    
---  ------                --------------  -----                    
 0   station_id            21802 non-null  object                   
 1   epoch                 21802 non-null  int64                    
 2   humidity_avg          21792 non-null  float64                  
 3   humidity_high         21792 non-null  float64                  
 4   humidity_low          21792 non-null  float64                  
 5   obs_time_local        21802 non-null  datetime64[ns]           
 6   obs_time_utc          21802 non-null  datetime64[ns, UTC+08:00]
 7   solar_radiation_high  21792 non-null  float64                  
 8   uv_high               21792 non-null  float64                  
 9   wind_direction_avg    21792 non-null  float64                  
 10  dew_point_avg         21791 non-null  float64              

##### Get the total amount of rainfall per day

In [17]:
total_rainfall_per_day_df = df.groupby(['station_id', 'obs_day']).agg({
    'obs_time_local': 'last',
    'precipitation_total': 'max'
}).reset_index()

In [18]:
total_rainfall_per_day_df.head()
total_rainfall_per_day_df.to_csv('total_daily_rainfall_07222023_08032023.csv')