In [8]:
"""This script will be used to open a connection to our database engine. We will create a function
that will return both the cursor (a cursor is an object that allows us to execute our sql queries) and connection objects.
Our second function with return an sql query as a dataframe
"""
#import libraries

import pyodbc #import pyodbc since we are currently using sql server as our database engine
import pandas as pd # dataframe library
import numpy as np #mathematical library
import datetime #date library


In [9]:
#create function to return cursor and connection object. Like most times we use try except to capture any exceptions. This
#also allows us to avoid locking out database if an exception occurs.

def connect_sql_server():
    
    try:
    
        #declare out variables that we will use to create our connection string to the engine

        driver = "ODBC Driver 17 for SQL Server" #sql engine that we are using
        server_name = "USCLDBITVMP01" #name assigned to the server. Any issues please talk with IT department
        database_name = "BI_Tableau" #the database that will be used in this connection
        user_name = "usertableau" 
        password = "usertableau$"

        """concatenate previous variabels to create connection string. We use 3 {} in the driver name since the syntax 
        requieres 1 set of {} for the driver parameter. This will be passed to the odbc library to open connection.
        We use f string to integrate previous variables. Trust_connection = yes sprecifies that a user account is used to
        open the connection"""

        connection_string = f"""DRIVER={{{driver}}};
                              SERVER={server_name};
                              DATABASE={database_name};
                              Trust_connection = yes;
                              UID={user_name};
                              PWD={password}"""

        #reference the odbc module to open connection with previous string

        connection = pyodbc.connect(connection_string)
        
        #declare cursor for sql query execution
        
        cursor_sql = connection.cursor()
        
        #return both connection and cursor objects for use
        
        return [connection,cursor_sql]
        
    except Exception as e:
        
        print(str(e))
        
#Function to return sql query as a dataframe. The function receives the query statement as well as the parameters required.

def querySQL(query, parameters):
    
    #get cursor object. We select each position in the list that connect_sql_server() returns
    connection = connect_sql_server()[0]
    cursor = connect_sql_server()[1]
    
    try:
        #execute pass query statement and parameters
        
        cursor.execute(query, parameters)
        
        """get all column names for our dataframe. The .description makes this possible. This is to avoid having to manually 
        put the column names for every distinct query statement we execute. Because the .description gives us all
        the details for each column and we only need the name, we declare the for every x in . description, just return
        the first spot in each tuple/list"""
        
        names = [ x[0] for x in cursor.description] 
        
        #get all data from the cursor execution with fetchall() method
        
        rows = cursor.fetchall()
        resultadoSQL = []
            
        #before we create the dataframe, we must first pass the cursor response into an array.
        #while rows is <> from None, get next result set if it exists, else rows = None and the while loop finishes.
        
        while rows:
            resultadoSQL.append(rows)
            if cursor.nextset():
                rows = cursor.fetchall()
            else:
                rows = None
                
        #we must redimension the previous array to 2 dimensions.
        #Turn resultadosSQL from a traditional list to a numpy array that allows us to use more funtions such as reshape.
        
        resultadoSQL = np.array(resultadoSQL)
        resultadoSQL = np.reshape(resultadoSQL, (resultadoSQL.shape[1], resultadoSQL.shape[2]) )
        
        """use .DataFrame to insert our resultadosSQL np array into a pandas dataframe. The first parameter is the
        array and the second is the column names that we obtained with the cursor.description."""
        
        df = pd.DataFrame(resultadoSQL, columns = names)
        
        #reutrn dataframe to start analysis
        
        return df
    
    except Exception as e:
        
        #if exception is captured, return exception statement and close the connection to engine
        
        return str(e)
        connection.close()
        
    # close cursor always after try or except block.
    
    finally:
    
    #first check if cursor is <> to None before closing to avoid possible error.
    
            if cursor is not None:
                cursor.close()
                
