# CPSC 368 SQL Sample

https://www.students.cs.ubc.ca/~cs-368/resources/connecting-to-db.html

To upload tables: 

- Import "knm_datasetup.sql" to SSH server: `scp "<PATH_TO_START>/knm_datasetup.sql" <CWL>@remote.students.cs.ubc.ca:/<PATH_TO_END>/`
- SSH connect to UBC department servers: `ssh <CWL>@remote.students.cs.ubc.ca`
- Move to location of "knm_datasetup.sql": `cd <PATH_TO_SQL>`
- Enter MySQL within SSH: `rlwrap sqlplus ora_<CWL>@stu`
    - Password: `<studentnum>`
- In MySQL, run `start knm_datasetup.sql;` to run all SQL imports

To run SQL in notebook (only tested within Jupyter Lab):

- Open terminal
- Run `ssh -l <CWL> -L 127.0.0.1:1522:dbhost.students.cs.ubc.ca:1522 remote.students.cs.ubc.ca` to access SSH server

In [1]:
import oracledb
import pandas as pd

In [2]:
# INPUT USER INFO HERE
CWL = ""
studentnum = ""

In [3]:
# Simple test run
dsn = oracledb.makedsn("localhost", 1522, service_name="stu")
connection = oracledb.connect(user=f"ora_{CWL}", password=f"a{studentnum}", dsn=dsn)
cur = connection.cursor()

try:
    # SQL code
    for row in cur.execute(
        """
        SELECT * FROM KFF2019_adult
        """
        ):
        print(row)
except Exception as e:
    print(f"Error executing SQL query: {e}")
finally: 
    cur.close()
    connection.close()

('United States', 0.612, 0.077, 0.145, 0.02, 0.016, 0.129, 1.0)
('Alabama', 0.598, 0.075, 0.116, 0.036, 0.025, 0.149, 1.0)
('Alaska', 0.582, 0.047, 0.159, 0.011, 0.047, 0.153, 1.0)
('Arizona', 0.569, 0.068, 0.171, 0.019, 0.019, 0.154, 1.0)
('Arkansas', 0.545, 0.073, 0.195, 0.036, 0.018, 0.132, 1.0)
('California', 0.58, 0.086, 0.202, 0.013, 0.01, 0.11, 1.0)
('Colorado', 0.642, 0.088, 0.127, 0.013, 0.026, 0.105, 1.0)
('Connecticut', 0.645, 0.066, 0.182, 0.016, 0.008, 0.083, 1.0)
('Delaware', 0.639, 0.058, 0.164, 0.019, 0.022, 0.097, 1.0)
('District of Columbia', 0.655, 0.069, 0.21, 0.009, 0.012, 0.045, 1.0)
('Florida', 0.527, 0.131, 0.101, 0.023, 0.022, 0.195, 1.0)
('Georgia', 0.602, 0.074, 0.088, 0.023, 0.024, 0.189, 1.0)
('Hawaii', 0.697, 0.052, 0.141, 0.016, 0.037, 0.058, 1.0)
('Idaho', 0.603, 0.113, 0.083, 0.023, 0.018, 0.16, 1.0)
('Illinois', 0.66, 0.07, 0.14, 0.02, 0.008, 0.103, 1.0)
('Indiana', 0.652, 0.059, 0.136, 0.023, 0.012, 0.117, 1.0)
('Iowa', 0.679, 0.069, 0.155, 0.015, 0.0

In [4]:
# KFF2019_NEW
dsn = oracledb.makedsn("localhost", 1522, service_name="stu")
connection = oracledb.connect(user=f"ora_{CWL}", password=f"a{studentnum}", dsn=dsn)
cur = connection.cursor()

try:
    # Remove KFF2019_NEW view only if it exists
    cur.execute("SELECT COUNT(*) FROM ALL_VIEWS WHERE VIEW_NAME = 'KFF2019_NEW'")
    view_exists = cur.fetchone()[0]
    if view_exists:
        cur.execute("DROP VIEW KFF2019_NEW CASCADE CONSTRAINTS")
    
    # Create KFF2019_NEW view
    cur.execute(
        """
        CREATE VIEW KFF2019_NEW AS 
        SELECT kffa1."Location" AS Location, kffa1."Uninsured" AS All_Uninsured, kfff1."Uninsured" AS Female_Uninsured, kffm1."Uninsured" AS Male_Uninsured
        FROM KFF2019_adult kffa1
        INNER JOIN KFF2019_female kfff1 ON kffa1."Location" = kfff1."Location"
        INNER JOIN KFF2019_male kffm1 ON kffa1."Location" = kffm1."Location"
        WHERE kffa1."Location" != 'United States'
        """
    )
    cur.execute(
        """
        SELECT * 
        FROM KFF2019_NEW
        """
    )
    
    # Fetch SELECT results 
    rows = cur.fetchall()
    
    # Get column names from cursor
    columns = [desc[0] for desc in cur.description]
    
    # Create DataFrame
    df = pd.DataFrame(rows, columns=columns)
    
    print("Completed dataframe.")
    
except Exception as e:
    print(f"Error executing SQL query: {e}")

finally:
    # Close cursor
    cur.close()
    connection.close()

Completed dataframe.


In [5]:
display(df.head())

Unnamed: 0,LOCATION,ALL_UNINSURED,FEMALE_UNINSURED,MALE_UNINSURED
0,Alabama,0.149,0.133,0.167
1,Alaska,0.153,0.119,0.187
2,Arizona,0.154,0.138,0.17
3,Arkansas,0.132,0.113,0.151
4,California,0.11,0.095,0.125


In [6]:
# USCDI_FINAL
dsn = oracledb.makedsn("localhost", 1522, service_name="stu")
connection = oracledb.connect(user=f"ora_{CWL}", password=f"a{studentnum}", dsn=dsn)
cur = connection.cursor()

try:
    # Remove USCDI_FINAL view only if it exists
    cur.execute("SELECT COUNT(*) FROM ALL_VIEWS WHERE VIEW_NAME = 'USCDI_FINAL'")
    view_exists = cur.fetchone()[0]
    if view_exists:
        cur.execute("DROP VIEW USCDI_FINAL CASCADE CONSTRAINTS")
    
    # Create USCDI_FINAL view
    cur.execute(
        """
        CREATE VIEW USCDI_FINAL AS 
        SELECT USCDI_MID."YearStart" AS YearStart, 
            USCDI_MID."YearEnd" AS YearEnd, 
            USCDI_MID."LocationDesc" AS LocationDesc, 
            USCDI_MID."Topic" AS Topic, 
            USCDI_MID."Question" AS Question, 
            USCDI_MID."DataValueUnit" AS DataValueUnit, 
            USCDI_MID."DataValueType" AS DataValueType, 
            USCDI_MID."DataValue" AS DataValue, 
            USCDI_MID."StratificationCategory1" AS StratificationCategory1, 
            USCDI_MID."Stratification1" AS Stratification1, 
            USCDI_MID."Has2019" AS Has2019, 
            USCDI_MID."Range" AS Range, 
            (USCDI_MID."DataValue" / USCDI_MID."Range") AS AvgDataValue
        FROM (
            SELECT cdif1."YearStart", cdif1."YearEnd", cdif1."LocationDesc", cdif1."Topic", cdif1."Question", 
                   cdif1."DataValueUnit", cdif1."DataValueType", cdif1."DataValue", 
                   cdif1."StratificationCategory1", cdif1."Stratification1", 
                   CAST(
                       CASE 
                           WHEN ((cdif1."YearStart" <= 2019) AND (cdif1."YearEnd" >= 2019)) THEN 1
                           ELSE 0
                       END AS NUMBER(1, 0)
                   ) AS "Has2019", 
                   CAST(
                       (cdif1."YearEnd" - cdif1."YearStart" + 1) AS NUMBER(2, 0)
                   ) AS "Range"
            FROM USCDI_filter cdif1
            WHERE cdif1."LocationDesc" != 'United States'
        ) USCDI_MID
        """
    )
    cur.execute(
        """
        SELECT * 
        FROM USCDI
        """
    )
    
    # Fetch SELECT results 
    rows2 = cur.fetchall()
    
    # Get column names from cursor
    columns2 = [desc[0] for desc in cur.description]
    
    # Create DataFrame
    df2 = pd.DataFrame(rows2, columns=columns2)
    
    print("Completed dataframe.")
    
except Exception as e:
    print(f"Error executing SQL query: {e}")

finally:
    # Close cursor and connection
    cur.close()
    connection.close()

Completed dataframe.


In [7]:
display(df2.head())

Unnamed: 0,YearStart,YearEnd,LocationDesc,Topic,Question,DataValueUnit,DataValueType,DataValue,StratificationCategory1,Stratification1,Has2019,Range,AvgDataValue
0,2015,2019,Wyoming,Cancer,"Prostate cancer mortality among all males, und...","per 100,000",Age-adjusted Rate,17.2,Sex,Male,1,5,3.44
1,2016,2020,Hawaii,Cancer,"Cervical cancer mortality among all females, u...","per 100,000",Age-adjusted Rate,,Sex,Male,1,5,
2,2016,2020,New Mexico,Cancer,"Breast cancer mortality among all females, und...","per 100,000",Crude Rate,26.4,Overall,Overall,1,5,5.28
3,2016,2020,Arkansas,Cancer,"Prostate cancer mortality among all males, und...","per 100,000",Crude Rate,20.2,Sex,Male,1,5,4.04
4,2016,2020,Indiana,Cancer,Invasive cancer (all sites combined) mortality...,"per 100,000",Age-adjusted Rate,201.3,Sex,Male,1,5,40.26
