### PostgreSQL–Python Interfacing Setup

In [1]:
# Install SQLAlchemy (open-source SQL toolkit and Object-Relational Mapping (ORM) library for Python)
%pip install sqlalchemy
# Install PostgreSQL driver 
%pip install psycopg2

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Install add-on package for SQLAlchemy
%pip install sqlalchemy_utils

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import Libraries
import pandas as pd
import numpy as np
import sqlalchemy as db
from sqlalchemy_utils import create_database
from sqlalchemy import text

### Create Database in PostgreSQL

One can go into one's pgadmin, and right click at the PostgreSQL server, and select properties to find out one's username and port number.

In [4]:

# As an example
# Username is postgres and password is admin, and port number is 5435 ( To check one's properties)
# To create a *NEW* db, say, starter, note the make up of the below string
# Create SQLAlchemy engine
engine = db.create_engine('postgresql://postgres:Aqua.2212@localhost:5432/staging_database')

# create database
create_database(engine.url)

# release resources associated with engine
engine.dispose()

### Extract and Transform raw data in Pandas

In [7]:
# Historical 24-hour forecasts for 2023 and 2024
df_raw_fc2024 = pd.read_csv('Historical24hourWeatherForecast2024.csv', sep=',')
df_raw_fc2023 = pd.read_csv('Historical24hourWeatherForecast2023.csv', sep=',')
df_raw_fcs = pd.concat([df_raw_fc2023, df_raw_fc2024])

df_raw_fcs['time_period_start'] = pd.to_datetime(df_raw_fcs['time_period_start'])
df_raw_fcs['fordate'] = df_raw_fcs['time_period_start'].dt.date

agg_forecasts = pd.DataFrame({'fordate': sorted(df_raw_fcs['fordate'].unique())})

agg_results = []

for col in [c for c in df_raw_fcs.columns if c.endswith('_forecast_text')]:
    target_name = col.replace('_forecast_text', '')
    agg_col = (
        df_raw_fcs.groupby('fordate')[col]
        .apply(lambda x: x.str.contains('rain|showers', case=False, na=False).any())
        .reset_index(name=target_name)
    )
    agg_results.append(agg_col)

for agg_col in agg_results:
    agg_forecasts = agg_forecasts.merge(agg_col, on='fordate', how='left')

agg_forecasts['fordate'] = pd.to_datetime(agg_forecasts['fordate'])

#print(agg_forecasts)

agg_forecasts = agg_forecasts.melt(
    id_vars='fordate',
    value_vars=['south', 'north', 'east', 'central', 'west'],
    var_name='region',
    value_name='rain_forecasted'
)

print(agg_forecasts)
#print(agg_forecasts[agg_forecasts['fordate'] == '2024-06-25'])

        fordate region  rain_forecasted
0    2022-01-01  south            False
1    2023-01-01  south             True
2    2023-01-10  south             True
3    2023-01-11  south             True
4    2023-01-12  south            False
...         ...    ...              ...
3485 2024-12-27   west             True
3486 2024-12-28   west             True
3487 2024-12-29   west             True
3488 2024-12-30   west             True
3489 2024-12-31   west             True

[3490 rows x 3 columns]


### Create Tables in PostgreSQL

In [None]:
# Create new tables in PostgreSQL. 

# commands = ()

with engine.begin() as conn:
    for command in commands:
        conn.execute(text(command))

In [8]:
# Create Table in SQL

agg_forecasts.to_sql(name= 'agg_forecasts', con = engine, if_exists= 'append', index= False) 

490