https://www.datacamp.com/community/tutorials/beginners-introduction-postgresql#comments
<br>
#### TO DO
1. Modularise code. 
2. Add append logic for tables. 
3. Connect to Google Cloud SQL etc. 

### Accessing SQL using magic Commands

In [1]:
# load the SQL extension in jupyter notebook
%load_ext sql

In [4]:
%sql postgresql://postgres:Anand1996@localhost:5432/Covid19-India

In [5]:
# Drop the table and create it again 

%sql DROP table overall_stats

 * postgresql://postgres:***@localhost:5432/Covid19-India
Done.


[]

In [6]:
%%sql
CREATE TABLE overall_stats(
date DATE PRIMARY KEY,
DailyConfirmed INT NOT NULL,
DailyDeceased INT NOT NULL,
DailyRecovered INT NOT NULL,
TotalConfirmed INT NOT NULL,
TotalDeceased INT NOT NULL,
TotalRecovered INT NOT NULL
);

 * postgresql://postgres:***@localhost:5432/Covid19-India
Done.


[]

In [13]:
def query_table(limit_rows):
    """querying table using magic function
    can be embedded with python code and can also insert python vars etc. within {}
    """
    
    %sql SELECT * FROM overall_stats LIMIT {limit_rows}

In [14]:
query_table(10)

 * postgresql://postgres:***@localhost:5432/Covid19-India
0 rows affected.


### Accessing your database through your Python code 
https://docs.sqlalchemy.org/en/13/ <br>
SQLAlchemy provides a more suitable engine to interface with your RDBMS.
Supported Dialects :
PostgreSQL | MySQL | SQLite | Oracle | Microsoft SQL Server

In [None]:
#### Preparing DataFrames, The Idea is to run the Ingestion Script with all fetching 
# fucntions and save function to true. 

In [1]:
from sqlalchemy import create_engine
import pandas as pd
from Covid19_india_org_api import make_dataframe, get_test_dataframe, make_state_dataframe
from psycopg2 import ProgrammingError, errors, IntegrityError

#### Creating Tables

In [2]:
def create_table_overall_stats(engine):
    """ Initial setup of overall_stats table according to Schema
    (rigid, hard-coded, can cause problems) - consult others. 
    """
    # Creating Overall_stats table
    engine.execute(""" CREATE TABLE overall_stats(
                "Date" DATE PRIMARY KEY,
                "DailyConfirmed" INT NOT NULL,
                "DailyDeceased" INT NOT NULL,
                "DailyRecovered" INT NOT NULL,
                "TotalConfirmed" INT NOT NULL,
                "TotalDeceased" INT NOT NULL,
                "TotalRecovered" INT NOT NULL
                )""")

In [3]:
def create_table_testing_stats(engine):
    """ Initial setup of testing_stats table
    """
# Creating testing stats table
    engine.execute(""" CREATE TABLE testing_stats(
                "Date" DATE PRIMARY KEY,
                "TestingSamples" INT NOT NULL,
                FOREIGN KEY("Date")
                    REFERENCES overall_stats("Date")
                )""")

In [4]:
def create_table_state_info(engine):
    """ Initial setup of state_info table, using pandas.DF.to_sql to create schema and adding
    keys later due to the number of columns. 
    """
    # Creating state_info table
    engine.execute("""CREATE TABLE "states_info" (
    "Date" DATE PRIMARY KEY,
    "Total.Confirmed" INTEGER,
      "Total.Deceased" INTEGER,
      "Total.Recovered" INTEGER,
      "AndamanAndNicobarIslands.Confirmed" INTEGER,
      "AndamanAndNicobarIslands.Deceased" INTEGER,
      "AndamanAndNicobarIslands.Recovered" INTEGER,
      "AndhraPradesh.Confirmed" INTEGER,
      "AndhraPradesh.Deceased" INTEGER,
      "AndhraPradesh.Recovered" INTEGER,
      "ArunachalPradesh.Confirmed" INTEGER,
      "ArunachalPradesh.Deceased" INTEGER,
      "ArunachalPradesh.Recovered" INTEGER,
      "Assam.Confirmed" INTEGER,
      "Assam.Deceased" INTEGER,
      "Assam.Recovered" INTEGER,
      "Bihar.Confirmed" INTEGER,
      "Bihar.Deceased" INTEGER,
      "Bihar.Recovered" INTEGER,
      "Chandigarh.Confirmed" INTEGER,
      "Chandigarh.Deceased" INTEGER,
      "Chandigarh.Recovered" INTEGER,
      "Chhattisgarh.Confirmed" INTEGER,
      "Chhattisgarh.Deceased" INTEGER,
      "Chhattisgarh.Recovered" INTEGER,
      "DadraAndNagarHaveliAndDamanAndDiu.Confirmed" INTEGER,
      "DadraAndNagarHaveliAndDamanAndDiu.Deceased" INTEGER,
      "DadraAndNagarHaveliAndDamanAndDiu.Recovered" INTEGER,
      "Dd.Confirmed" INTEGER,
      "Dd.Deceased" INTEGER,
      "Dd.Recovered" INTEGER,
      "Delhi.Confirmed" INTEGER,
      "Delhi.Deceased" INTEGER,
      "Delhi.Recovered" INTEGER,
      "Goa.Confirmed" INTEGER,
      "Goa.Deceased" INTEGER,
      "Goa.Recovered" INTEGER,
      "Gujarat.Confirmed" INTEGER,
      "Gujarat.Deceased" INTEGER,
      "Gujarat.Recovered" INTEGER,
      "Haryana.Confirmed" INTEGER,
      "Haryana.Deceased" INTEGER,
      "Haryana.Recovered" INTEGER,
      "HimachalPradesh.Confirmed" INTEGER,
      "HimachalPradesh.Deceased" INTEGER,
      "HimachalPradesh.Recovered" INTEGER,
      "JammuAndKashmir.Confirmed" INTEGER,
      "JammuAndKashmir.Deceased" INTEGER,
      "JammuAndKashmir.Recovered" INTEGER,
      "Jharkhand.Confirmed" INTEGER,
      "Jharkhand.Deceased" INTEGER,
      "Jharkhand.Recovered" INTEGER,
      "Karnataka.Confirmed" INTEGER,
      "Karnataka.Deceased" INTEGER,
      "Karnataka.Recovered" INTEGER,
      "Kerala.Confirmed" INTEGER,
      "Kerala.Deceased" INTEGER,
      "Kerala.Recovered" INTEGER,
      "Ladakh.Confirmed" INTEGER,
      "Ladakh.Deceased" INTEGER,
      "Ladakh.Recovered" INTEGER,
      "Lakshadweep.Confirmed" INTEGER,
      "Lakshadweep.Deceased" INTEGER,
      "Lakshadweep.Recovered" INTEGER,
      "MadhyaPradesh.Confirmed" INTEGER,
      "MadhyaPradesh.Deceased" INTEGER,
      "MadhyaPradesh.Recovered" INTEGER,
      "Maharashtra.Confirmed" INTEGER,
      "Maharashtra.Deceased" INTEGER,
      "Maharashtra.Recovered" INTEGER,
      "Manipur.Confirmed" INTEGER,
      "Manipur.Deceased" INTEGER,
      "Manipur.Recovered" INTEGER,
      "Meghalaya.Confirmed" INTEGER,
      "Meghalaya.Deceased" INTEGER,
      "Meghalaya.Recovered" INTEGER,
      "Mizoram.Confirmed" INTEGER,
      "Mizoram.Deceased" INTEGER,
      "Mizoram.Recovered" INTEGER,
      "Nagaland.Confirmed" INTEGER,
      "Nagaland.Deceased" INTEGER,
      "Nagaland.Recovered" INTEGER,
      "Odisha.Confirmed" INTEGER,
      "Odisha.Deceased" INTEGER,
      "Odisha.Recovered" INTEGER,
      "Puducherry.Confirmed" INTEGER,
      "Puducherry.Deceased" INTEGER,
      "Puducherry.Recovered" INTEGER,
      "Punjab.Confirmed" INTEGER,
      "Punjab.Deceased" INTEGER,
      "Punjab.Recovered" INTEGER,
      "Rajasthan.Confirmed" INTEGER,
      "Rajasthan.Deceased" INTEGER,
      "Rajasthan.Recovered" INTEGER,
      "Sikkim.Confirmed" INTEGER,
      "Sikkim.Deceased" INTEGER,
      "Sikkim.Recovered" INTEGER,
      "TamilNadu.Confirmed" INTEGER,
      "TamilNadu.Deceased" INTEGER,
      "TamilNadu.Recovered" INTEGER,
      "Telangana.Confirmed" INTEGER,
      "Telangana.Deceased" INTEGER,
      "Telangana.Recovered" INTEGER,
      "Tripura.Confirmed" INTEGER,
      "Tripura.Deceased" INTEGER,
      "Tripura.Recovered" INTEGER,
      "UttarPradesh.Confirmed" INTEGER,
      "UttarPradesh.Deceased" INTEGER,
      "UttarPradesh.Recovered" INTEGER,
      "Uttarakhand.Confirmed" INTEGER,
      "Uttarakhand.Deceased" INTEGER,
      "Uttarakhand.Recovered" INTEGER,
      "WestBengal.Confirmed" INTEGER,
      "WestBengal.Deceased" INTEGER,
      "WestBengal.Recovered" INTEGER,
      "StateUnassigned.Confirmed" INTEGER,
      "StateUnassigned.Deceased" INTEGER,
      "StateUnassigned.Recovered" INTEGER,
       FOREIGN KEY("Date")
       REFERENCES overall_stats("Date")
    )
    """)

### Data Ingestion Function

In [5]:
# append problem, duplicate key values. Shouldn't happen with append but here we are.
# workaround. fetch length of existing records in table and then only store records after that. Can be problematic.
# Cannot replace due to the presence of foreign key.

def add_data_table(engine, tablename, df):
    """ Appends New Data to table if it exists
    Takes in engine connected to DB, tablename and dataframe to store.
    Throws error if 1. Table Doesn't Exist, 2. incorrect table and dataframe ?(abstract this coice away from user)
    """
    
    try:
        results = engine.execute(f"""SELECT * FROM {tablename}""")
        num_records = len(results.fetchall())
        print(f'{num_records} Records in {tablename}')

        df[num_records:].to_sql(tablename, engine, if_exists='append')
        print(f'Added {len(df[num_records:])} Records to table')
    
    # Just can't seem to get errors to work 
    except IntegrityError as e:
        print(e)
        if err == IntegrityError :
            print('Update Master Table first')

#### Main Function - Data Ingestion

In [6]:
# creating engine for executing sql queries
engine = create_engine('postgresql://postgres:Anand1996@localhost:5432/Covid19-India')

In [13]:
# Creating Tables 
create_table_overall_stats(engine)
create_table_testing_stats(engine)
create_table_state_info(engine)

In [7]:
# Ingesting overall stats data 
data = make_dataframe(save= True)

In [8]:
add_data_table(engine, 'overall_stats', data)

177 Records in overall_stats
Added 1 Records to table


In [9]:
# Ingesting Testing Data 

# test has duplicates for a single date, will fail the unique constraint for key, remove first.
test = get_test_dataframe(save=True)
test = test.loc[~test.index.duplicated(keep='last')]
add_data_table(engine, 'testing_stats', test[:-1])

122 Records in testing_stats
Added 0 Records to table


In [10]:
# Ingesting State column

state = make_state_dataframe(save=True)

# Creating a flat column index
cols = state.columns.get_level_values(0).str.title() + '.' + state.columns.get_level_values(1)
state.columns = cols
state.columns = state.columns.str.replace(' ', '')

add_data_table(engine, 'states_info', state)

133 Records in states_info
Added 1 Records to table


### Dumping PostgreSQL DB
Backed up using GUI for now. <br>
https://www.postgresqltutorial.com/postgresql-backup-database/

In [11]:
import subprocess

In [12]:
subprocess.run(['pg_dump', '--host=localhost', '--dbname=Covid19-India',
                '--username=postgres', '--no-password','--format=p',
                '--file=/Users/apple/Desktop/DS/Covid19-Kaggle_and_End-End_project/Data/Cleaned/Covid19-India_backup.sql'])

CompletedProcess(args=['pg_dump', '--host=localhost', '--dbname=Covid19-India', '--username=postgres', '--no-password', '--format=p', '--file=/Users/apple/Desktop/DS/Covid19-Kaggle_and_End-End_project/Data/Cleaned/Covid19-India_backup.sql'], returncode=0)