In [1]:
##############

# RUN ONLY ONCE

##############

### Importing required modules

In [2]:
import sqlite3
import numpy as np 
import pandas as pd

### Database Creation

In [3]:
class Database:

    def __init__(self, db_name):
        self.db_name = db_name
        self.CONNECTION_EASTABLISHED = False

    def get_connection(self):
        try:
            db_con = sqlite3.connect(self.db_name)
            self.connection = db_con
            print('--- CONNECTION ESTABLISHED SUCCESSFULLY ---')
            self.CONNECTION_EASTABLISHED = True
        except sqlite3.Error as e:
            raise
    
    def execute_query(self, sql_script_file):
        
        try:
            db_cur = self.connection.cursor()

            with open(sql_script_file, 'r') as sql_file:
                sql_script = sql_file.read()
                db_cur.executescript(sql_script)
                self.connection.commit() 
        except sqlite3.Error as e:
            raise

    
    def close_connection(self):
        try:
            self.connection.close()
            print('--- CONNECTION CLOSED SUCCESSFULLY ---')
        except sqlite3.Error as e:
            raise
    
    

In [4]:
def create_db(db_name, schema_file):
    d_obj = Database(db_name)

    try:
        d_obj.get_connection() 
        d_obj.execute_query(schema_file)

    except Exception as e:
        print(f'Error: {e}')

    finally:
        if d_obj.CONNECTION_EASTABLISHED:
            d_obj.close_connection()

In [5]:
DATABASE_NAME = '../datasets/climate.db'
SCHEMA_FILE = '../datasets/schema.sql'

In [6]:
create_db(DATABASE_NAME, SCHEMA_FILE)

--- CONNECTION ESTABLISHED SUCCESSFULLY ---
--- CONNECTION CLOSED SUCCESSFULLY ---


### Inserting data into tables (using pandas)

In [7]:
# obtain connection to climate.db
climate_db_con = sqlite3.connect(DATABASE_NAME)

# load data into a DataFrame
states = pd.read_parquet('../datasets/final_datasets/states.parquet')
regions = pd.read_parquet('../datasets/final_datasets/regions.parquet')
weather_stations = pd.read_parquet('../datasets/final_datasets/weather_stations.parquet')
quality_metadata = pd.read_parquet('../datasets/final_datasets/quality_metadata.parquet')
bom_data_metadata = pd.read_parquet('../datasets/final_datasets/bom_data_metadata.parquet')
bom_data = pd.read_parquet('../datasets/final_datasets/BOM_DATA_FINAL.parquet')

# Write data into respective sqlite tables
states.to_sql("states", climate_db_con, index=False, if_exists='append')
regions.to_sql("regions", climate_db_con, index=False, if_exists='append')
weather_stations.to_sql("weather_stations", climate_db_con, index=False, if_exists='append')
quality_metadata.to_sql("quality_metadata", climate_db_con, index=False, if_exists='append')
bom_data_metadata.to_sql("bom_data_metadata", climate_db_con, index=False, if_exists='append')
bom_data.to_sql("bom_data", climate_db_con, index=False, if_exists='append')

# commit changes
climate_db_con.commit()

# close connection
climate_db_con.close()

### Testing Database

In [8]:
climate_db_con = sqlite3.connect(DATABASE_NAME)
pd.read_sql_query('SELECT * FROM states', climate_db_con) # outputs all states

Unnamed: 0,state_id,state_name
0,1,A.A.T.
1,2,A.E.T.
2,3,N.S.W.
3,4,N.T.
4,5,QLD
5,6,S.A.
6,7,TAS
7,8,VIC
8,9,W.A.


In [9]:
pd.read_sql_query('SELECT * FROM regions', climate_db_con) # outputs all regions

Unnamed: 0,region_id,region_name,state_id
0,1,Aboriginal Shire of Kowanyama,5
1,2,Adelaide,6
2,3,Alice Springs,4
3,4,Augusta - Margaret River,9
4,5,Ballarat,8
...,...,...,...
114,115,Wellington,8
115,116,West Torrens,6
116,117,Western Arnhem,4
117,118,Whyalla,6


In [10]:
pd.read_sql_query( # outputs all regions in VIC
"""
select r.region_name, s.state_name 
from regions r join states s
on r.state_id = s.state_id
where s.state_name = 'VIC';
""",
climate_db_con
)

Unnamed: 0,region_name,state_name
0,Ballarat,VIC
1,Campaspe,VIC
2,Colac Otway,VIC
3,East Gippsland,VIC
4,Greater Bendigo,VIC
5,Greater Shepperon,VIC
6,Hindmarsh,VIC
7,Hobsons Bay,VIC
8,Indigo,VIC
9,Kingston,VIC


In [11]:
pd.read_sql_query( # retrieve skills of a given persona using a sub-query
"""
select key as skill_number, value as skill
from json_each((select goals from personas where persona_id = 1));
""",
climate_db_con
)

Unnamed: 0,skill_number,skill
0,0,Identify high-risk zones for natural disasters...
1,1,Make data-backed decisions to adjust policies ...
2,2,Monitor evolving climate-related threats in ne...


In [12]:
pd.read_sql_query( # retrieve skills of a given persona using a sub-query
"""
select *
from bom_data
limit 10;
""",
climate_db_con
)

Unnamed: 0,Location,DMY,Precipitation,PrecipQual,RainDaysNum,RainDaysMeasure,Evaporation,EvapQual,EvapDaysNum,MaxTemp,...,Okta09,Okta09Qual,Okta12,Okta12Qual,Okta15,Okta15Qual,Okta18,Okta18Qual,Okta21,Okta21Qual
0,14015,1970-01-01,0.0,Y,,,10.2,Y,1,34.3,...,5,Y,4,Y,4,Y,8,Y,8,Y
1,14015,1970-01-02,0.0,Y,,,10.9,Y,1,33.4,...,8,Y,8,Y,8,Y,8,Y,8,Y
2,14015,1970-01-03,0.0,Y,,,8.4,Y,1,31.1,...,8,Y,8,Y,8,Y,8,Y,6,Y
3,14015,1970-01-04,0.5,Y,1.0,1.0,4.3,Y,1,33.3,...,8,Y,6,Y,5,Y,7,Y,5,Y
4,14015,1970-01-05,0.0,Y,,,7.4,Y,1,34.6,...,6,Y,7,Y,6,Y,4,Y,1,Y
5,14015,1970-01-06,47.5,Y,1.0,1.0,7.1,Y,1,34.1,...,2,Y,2,Y,3,Y,2,Y,0,Y
6,14015,1970-01-07,0.0,Y,,,11.4,Y,1,33.3,...,1,Y,2,Y,1,Y,1,Y,0,Y
7,14015,1970-01-08,0.0,Y,,,6.1,Y,1,33.4,...,1,Y,2,Y,1,Y,1,Y,0,Y
8,14015,1970-01-09,0.0,Y,,,11.2,Y,1,33.7,...,1,Y,1,Y,1,Y,3,Y,6,Y
9,14015,1970-01-10,0.5,Y,1.0,1.0,10.4,Y,1,35.0,...,7,Y,7,Y,6,Y,7,Y,4,Y


In [13]:
climate_db_con.close()