In [42]:
#!/usr/bin/env python
 
'''
UTAS_DB_tools.py
A. J. McCulloch, February 2020
'''

####################################################################################################
# Import modules
####################################################################################################

import pandas as pd # Required for dataframe manipulation
import cx_Oracle # Required for connection to database
from getpass import getpass # Required for password input

####################################################################################################
# Define classes
####################################################################################################
class DW_connect:
    
    connected = False # Initialise the connection attribute flag
    
    # Establish connection to the data warehouse
    def __init__(self, username):
        if self.connected == False: # Don't connect if already connected
            self.username = username # Username to connect to data warehouse
            self.password = getpass() # Get password associated with user above

            """
            Make the required Data Source Name (DSN) Transparent Network Substrate (TNS)
            Info comes from an email from Nathalie (FW: DB Client Installs for Data Warehouse Access)
            Originating from Andrew with a document Oracle DB Drivers and Install
            """
            self.dsn_tns = cx_Oracle.makedsn('exa1-scan.its.utas.edu.au', '1521', service_name=r'edwprod_maa')
            # Connect to the database
            self.conn = cx_Oracle.connect(user = self.username, password = self.password, dsn = self.dsn_tns)

            # Verify connection has been made and update connection flag
            try:
                print("Connected to database, Oracle version {}".format(self.conn.version))
                self.connected = True
            except NameError:
                print("Could not connect to database")
        else:
            print("Connection already established")
    
    # Disconnect from the data warehouse
    def disconnect(self):
        self.conn.close()
        print("Connection to database ended")
        self.connected = False

# Class required to store data warehouse tables
class tablist(object):
    pass
####################################################################################################
# Define functions
####################################################################################################
# A function to run SQL query over a particular connection
def runSQL(query, conn = EDW.conn):
    # Use pandas to execute SQL
    return pd.read_sql_query(query, con = conn)
     
# Return currently accessible tables
def get_tables(schema='owner'):
    # SQL to retun all tables accessible under schema
    q = """
        SELECT
            table_name, {0}
        FROM
            all_tables
        ORDER BY
            {0}, table_name
        """.format(schema)
    # Run the SQL and return the result
    return runSQL(q)

# Function to return all available tables
def init_tables():
    tbls = get_tables() # Get available tables
    sch = tbls.OWNER.unique() # Get unique schema
    # Store each schema as an attribute for table object with associated table
    for s in sch:
        setattr(tables, s, tbls[tbls['OWNER']==s].reset_index(drop=True)) # Indexdrop required!
    print('Available schema are '+', '.join('{}'.format(s) for s in sch))

# Function to return an entire table
def get_all(table, row):
    q = 'SELECT * FROM {}.{}'.format(tables.STUDENTS.OWNER[row], tables.STUDENTS.TABLE_NAME[row])
    return runSQL(q)

# Connecting to the data warehouse
## Example code
### Initialise connection

In [2]:
####################################################################################################
####################################################################################################
# Code starts here
####################################################################################################
####################################################################################################

# Connect to the data warehouse
EDW = DW_connect('ajm32')

 ···············


Connected to database version 12.1.0.2.0


### Find availables tables

In [43]:
tables = tablist() # Create tablist object
init_tables()

Available schema are CTXSYS, DATA_MART, STUDENTS, SYS, SYSTEM, XDB


### Return a table

In [62]:
df = get_all(tables.STUDENTS, 0)
df.head(5)

Unnamed: 0,ENROL_YEAR,AS_AT_DATE,ISO_WEEK_NO,STUDENT_ID,D_PERSON_KEY,COURSE_CODE,COURSE_VERSION_NO,COURSE_ATTEMPT_NO,COURSE_NAME,COURSE_LOAD,...,PLACEHOLDER_4_CHAR_COURSE,NTAP_FLAG,SEM_ORIG_LOC_SAME_FLAG,QUOTA_COURSE_IND,QUOTA_COURSE_FLAG,NON_CORE_FLAG,NON_CORE_YEAR_FLAG,EFTSL,PRIMARY_COURSE_ENROLMENTS,COURSE_ENROLMENTS
0,2020,20200216,7,425208,96761,M4Z,1,1,Bachelor of Health and Human Services (Leaders...,1.0,...,M4Z,N,Y,Non-Quota,N,N,N,0.25,1,1
1,2020,20200216,7,426568,765663,H4F,2,1,Bachelor of Nursing with Professional Honours ...,2.0,...,H4F,Y,Y,Non-Quota,N,Y,Y,0.5,1,1
2,2020,20200216,7,21335,592565,H3D,2,1,Bachelor of Nursing,3.0,...,H3D,N,N,Quota,Y,N,N,1.0,1,1
3,2020,20200216,7,12059,656126,H6F,2,1,Graduate Diploma of Nursing (Specialisation),2.0,...,H6F,N,N,Non-Quota,N,N,N,0.25,1,1
4,2020,20200216,7,10871,661558,S8O,1,1,Master of Science (Biological Sciences),0.0,...,S8O,N,Y,Non-Quota,N,N,N,0.5,1,1


### Disconnect from the database server

In [63]:
# Disconnect from the data warehouse
EDW.disconnect()

Connection to database ended


# Manipulating the data