In [2]:
import requests
import warnings, sys
import urllib
import itertools
from requests.exceptions import ConnectionError, HTTPError, Timeout, TooManyRedirects
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import traceback
import logging
import logging.handlers
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pyodbc
import sqlalchemy as sa
from sqlalchemy import create_engine, event
import json
import time as ti
import datetime
from datetime import datetime
import re
import os
import asyncio
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets
import ctypes
import threading

#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
global stage, conn
conn = False
con = None
try:
    
    global ImportActive, StageActive, ProdActive
    logging.basicConfig(filename = 'CommonConfigManualLog.log',
                        filemode='a',
                        format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                        datefmt='%H:%M:%S',
                        level=logging.DEBUG)
    logging.info(f'\nLOG START: {datetime.now()}\n')

    def open_connection():
        global server, database, driver, dformat, connection, con
        with open(r"config\config.json", 'r') as fh:
            config = json.load(fh)
        server = config['server']
        database = config['database']
        driver = config['driver']
        dformat = config['dformat']
        connection = f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes'
        con = pyodbc.connect(connection)
        return con
    
    def connection_test():
        cur = con.cursor()
        cur.execute("SELECT @@version")
        row = cur.fetchone()
        print("Connection established to: ",row[0])
        cur.close()
        con.commit()
        logging.info(f"\nConnection established to: {row[0]}\n")
        return 
        
    def close_connection():
        con.close()
        conn = False
        return
    
    def checktables(con, tbl):
        if stage == False:
            cur = con.cursor()
            query = "SELECT * FROM information_schema.Tables WHERE table_schema = 'GCV_PRD' and table_name = '%s'" % tbl
            cur.execute(query)
            output = cur.fetchall()
            if output == []:
                output = f'\n[GCV_PRD].{tbl} does not exist in the database and will need to be created\n'
                print(output, end = "\r")
                logging.info(output, end = "\r")
                cur.close()
                return False
            else:
                print(output, end = "\r")
                cur.close()
            return True
        else:
            cur = con.cursor()
            query = "SELECT * FROM information_schema.Tables WHERE table_schema = 'GCV_STG' and table_name = '%s'" % tbl
            cur.execute(query)
            output = cur.fetchall()
            if output == []:
                output = f'\n[GCV_STG].{tbl} does not exist in the database and will need to be created\n'
                print(output, end = "\r")
                logging.info(output, end = "\r")
                cur.close()
                return False
            else:
                print(output, end = "\r")
                cur.close()
            return True

    
    def checkbackups(con, tbl):
        if stage == False:
            cur = con.cursor()
            query = "SELECT * FROM information_schema.Tables WHERE table_schema = 'GCV_PRD' and table_name = '%s_backup'" % tbl
            cur.execute(query)
            output = cur.fetchall()
            if output == []:
                output = f'\n[GCV_PRD].{tbl}_backup does not exist in the database and will need to be created once parent table has data\n'
                print(output, end = "\r")
                logging.info(output)
                cur.close()
                return False
            else:
                print(f'\nCurrent backup for [GCV_PRD].{tbl} exists.', end = "\r")
                logging.info(f'\nCurrent backup for [GCV_PRD].{tbl} exists.\n')
                cur.close()
                return True
        else:
            cur = con.cursor()
            query = "SELECT * FROM information_schema.Tables WHERE table_schema = 'GCV_STG' and table_name = '%s_backup'" % tbl
            cur.execute(query)
            output = cur.fetchall()
            if output == []:
                output = f'\n[GCV_STG].{tbl}_backup does not exist in the database and will need to be created once parent table has data\n'
                print(output, end = "\r")
                logging.info(output)
                cur.close()
                return False
            else:
                print(f'\nCurrent backup for [GCV_STG].{tbl}', end = "\r")
                logging.info(f'\nCurrent backup for [GCV_STG].{tbl}\n')
                cur.close()
                return True
    
    def tablecontent(con, tbl):
        if stage == False:
            cur = con.cursor()
            query = "SELECT COUNT(*) FROM [GCV_PRD].[%s]" % tbl
            cur.execute(query)
            output = cur.fetchone()
            #print(output)
            if output[0] == 0:
                print(f'\n{output[0]} Existing records', end = "\r")
                logging.info(f'\n{output[0]} Existing records\n')
                cur.close()
                return False
            else:
                print(f'\n{output[0]} Existing records', end = "\r")
                logging.info(f'\n{output[0]} Existing records\n')
                cur.close()
                return True
        else:
            cur = con.cursor()
            query = "SELECT COUNT(*) FROM [GCV_STG].[%s]" % tbl
            cur.execute(query)
            output = cur.fetchone()
            #print(output)
            if output[0] == 0:
                print(f'\n{output[0]} Existing records', end = "\r")
                logging.info(f'\n{output[0]} Existing records\n')
                cur.close()
                return False
            else:
                print(f'\n{output[0]} Existing records', end = "\r")
                logging.info(f'\n{output[0]} Existing records\n')
                cur.close()
                return True
        
    def backup_data(con, tbl):
        if stage == False:
            if is_identity(con, tbl) == True:
                try:
                    bstart_time = datetime.now()
                    cur = con.cursor()
                    query = f"SET IDENTITY_INSERT [GCV_PRD].[%s_backup] ON; INSERT INTO [GCV_PRD].[%s_backup] ({fields}) SELECT {fields} FROM [GCV_PRD].[%s]; SET IDENTITY_INSERT [GCV_PRD].[%s_backup] OFF;" % (tbl, tbl, tbl, tbl)
                    print(f'\n{query}\n')
                    cur.execute(query)
                    con.commit()
                    cur.close()
                    bend_time = datetime.now()
                    print('\nBackup Duration: {}'.format(bend_time - bstart_time))
                    duration = bend_time - bstart_time
                    logging.info(f'\nBackup Duration: {duration}')
                    print("")
                    logging.info('\n')   
                except(Exception, pyodbc.DatabaseError) as e:
                    print("")
                    logging.info('\n')
                    print(e)
                    logging.exception("message")
                    cur.close()
                    con.rollback()
            else:
                try:
                    bstart_time = datetime.now()
                    cur = con.cursor()
                    query = "INSERT INTO [GCV_PRD].[%s_backup] SELECT * FROM [GCV_PRD].[%s]" % (tbl, tbl)
                    cur.execute(query)
                    con.commit()
                    cur.close()
                    bend_time = datetime.now()
                    print('\nBackup Duration: {}'.format(bend_time - bstart_time))
                    duration = bend_time - bstart_time
                    logging.info(f'\nBackup Duration: {duration}')
                    print("")
                    logging.info('\n')   
                except(Exception, pyodbc.DatabaseError) as e:
                    print("")
                    logging.info('\n')
                    print(e)
                    logging.exception("message")
                    cur.close()
                    con.rollback()
            return
        else:
            if is_identity(con, tbl) == True:
                try:
                    bstart_time = datetime.now()
                    cur = con.cursor()
                    query = f"SET IDENTITY_INSERT [GCV_STG].[%s_backup] ON; INSERT INTO [GCV_STG].[%s_backup] ({fields}) SELECT {fields} FROM [GCV_STG].[%s]; SET IDENTITY_INSERT [GCV_STG].[%s_backup] OFF;" % (tbl, tbl, tbl, tbl)
                    print(f'\n{query}\n')
                    cur.execute(query)
                    con.commit()
                    cur.close()
                    bend_time = datetime.now()
                    print('\nBackup Duration: {}'.format(bend_time - bstart_time))
                    duration = bend_time - bstart_time
                    logging.info(f'\nBackup Duration: {duration}')
                    print("")
                    logging.info('\n')   
                except(Exception, pyodbc.DatabaseError) as e:
                    print("")
                    logging.info('\n')
                    print(e)
                    logging.exception("message")
                    cur.close()
                    con.rollback()
            else:
                try:
                    bstart_time = datetime.now()
                    cur = con.cursor()
                    query = "INSERT INTO [GCV_STG].[%s_backup] SELECT * FROM [GCV_STG].[%s]" % (tbl, tbl)
                    cur.execute(query)
                    con.commit()
                    cur.close()
                    bend_time = datetime.now()
                    print('\nBackup Duration: {}'.format(bend_time - bstart_time))
                    duration = bend_time - bstart_time
                    logging.info(f'\nBackup Duration: {duration}')
                    print("")
                    logging.info('\n')   
                except(Exception, pyodbc.DatabaseError) as e:
                    print("")
                    logging.info('\n')
                    print(e)
                    logging.exception("message")
                    cur.close()
                    con.rollback()
            return
    
    def get_fields(con, tbl):
        global fields
        if stage == False:
            schema = 'GCV_PRD'
            try:
                cur = con.cursor()
                query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'" % (schema, tbl)
                print(f'\n{query}\n')
                cur.execute(query)
                columns = cur.fetchall()
                columns = columns[0:]
                columns = list(zip(*columns))[0]
                fields = ", ".join(map(str, columns))
                print(f'\n{fields}\n')
                con.commit()
                cur.close()
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
            return fields
        else:
            schema = 'GCV_STG'
            try:
                cur = con.cursor()
                query = f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'" % (schema, tbl)
                print(f'\n{query}\n')
                cur.execute(query)
                columns = cur.fetchall()
                columns = columns[0:]
                columns = list(zip(*columns))[0]
                fields = ", ".join(map(str, columns))
                print(f'\n{fields}\n')
                con.commit()
                cur.close()
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
            return fields
        
    def is_identity(con, tbl):
        if stage == False:
            try:
                cur = con.cursor()
                query = f"SELECT is_identity FROM sys.identity_columns WHERE object_id in ( SELECT id FROM sysobjects WHERE NAME = '%s')" % (tbl)
                print(f'\n{query}\n')
                cur.execute(query)
                output = cur.fetchone()
                if output is None:
                    print(f'\n{output}\n')
                    con.commit()
                    cur.close()
                    return False
                else:
                    output = output[0]
                    print(f'\n{output}\n')
                    con.commit()
                    cur.close()
                    return True
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
            return 
        else:
            try:
                cur = con.cursor()
                query = "SELECT is_identity FROM sys.identity_columns WHERE object_id in ( SELECT id FROM sysobjects WHERE NAME = '%s')" % (tbl)
                print(f'\n{query}\n')
                cur.execute(query)
                output = cur.fetchone()
                if output is None:
                    print(f'\n{output}\n')
                    con.commit()
                    cur.close()
                    return False
                else:
                    output = output[0]
                    print(f'\n{output}\n')
                    con.commit()
                    cur.close()
                    return True
                con.commit()
                cur.close()
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
            return
        
    def create_backuptable(con, tbl):
        if stage == False:
            try:
                bstart_time = datetime.now()
                cur = con.cursor()
                query = "SELECT * INTO [GCV_PRD].[%s_backup] FROM [GCV_PRD].[%s]" % (tbl, tbl)
                cur.execute(query)
                con.commit()
                cur.close()
                bend_time = datetime.now()
                logging.info(f'\n[GCV_PRD].{tbl} backup table has been created.\n')
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
                return False
            return True
        else:
            try:
                bstart_time = datetime.now()
                cur = con.cursor()
                query = "SELECT * INTO [GCV_STG].[%s_backup] FROM [GCV_STG].[%s]" % (tbl, tbl)
                cur.execute(query)
                con.commit()
                cur.close()
                bend_time = datetime.now()
                logging.info(f'\n[GCV_STG].{tbl} backup table has been created.\n')
                print("")
                logging.info('\n')   
            except(Exception, pyodbc.DatabaseError) as e:
                print("")
                logging.info('\n')
                print(e)
                logging.exception("message")
                cur.close()
                con.rollback()
                return False
            return True
        
    def backupcheck(con, tbl):
        global rowcount
        if stage == False:
            cur = con.cursor()
            query = "SELECT COUNT(*) FROM [GCV_PRD].[%s] UNION ALL SELECT COUNT(*) FROM [GCV_PRD].[%s_backup]" % (tbl, tbl)
            cur.execute(query)
            output = cur.fetchone()
            rowcount = []
            while output is not None:
                rowcount.append(output[0])
                output = cur.fetchone()
            print('\n# of records in each table: ', rowcount[0], recnum, end = "\r")
            logging.info(f'\n# of records in each table: {rowcount[0]}, {recnum}')
            con.commit()
            cur.close()
            if recnum == rowcount[0]:
                truncate_backup(con, tbl)
                get_fields(con, tbl)
                backup_data(con, tbl)
            elif recnum < rowcount[0]:
                truncate_backup(con, tbl)
                get_fields(con, tbl)
                backup_data(con, tbl)
            elif rowcount[0] == 0:
                pass
            return rowcount
        else:
            cur = con.cursor()
            query = "SELECT COUNT(*) FROM [GCV_STG].[%s] UNION ALL SELECT COUNT(*) FROM [GCV_STG].[%s_backup]" % (tbl, tbl)
            cur.execute(query)
            output = cur.fetchone()
            rowcount = []
            while output is not None:
                rowcount.append(output[0])
                output = cur.fetchone()
            print('\n# of records in each table: ', rowcount[0], recnum, end = "\r")
            logging.info(f'\n# of records in each table: {rowcount[0]}, {recnum}')
            con.commit()
            cur.close()
            if recnum == rowcount[0]:
                truncate_backup(con, tbl)
                get_fields(con, tbl)
                backup_data(con, tbl)
            elif recnum < rowcount[0]:
                truncate_backup(con, tbl)
                get_fields(con, tbl)
                backup_data(con, tbl)
            elif rowcount[0] == 0:
                pass
            return rowcount
    
    def truncate_table(con, tbl):
        if stage == False:
            try:
                cur = con.cursor()
                query = "TRUNCATE TABLE [GCV_PRD].[%s]" % tbl
                cur.execute(query)
                print(f'\n[GCV_PRD].{tbl} has been succesfully truncated to import new data.')
                logging.info(f'\n[GCV_PRD].{tbl} has been succesfully truncated to import new data.\n')
                con.commit()
                cur.close()
            except Exception as err:
                logging.exception("message")
                cur.close()
                con.rollback()
                #raise err
        else:
            try:
                cur = con.cursor()
                query = "TRUNCATE TABLE [GCV_STG].[%s]" % tbl
                cur.execute(query)
                print(f'\n[GCV_STG].{tbl} has been succesfully truncated to import new data.')
                logging.info(f'\n[GCV_STG].{tbl} has been succesfully truncated to import new data.\n')
                con.commit()
                cur.close()
            except Exception as err:
                logging.exception("message")
                cur.close()
                con.rollback()
                #raise err
            
    def truncate_backup(con, tbl):
        if stage == False:
            try:
                cur = con.cursor()
                query = "TRUNCATE TABLE [GCV_PRD].[%s_backup]" % tbl
                cur.execute(query)
                print(f'\n[GCV_PRD].{tbl}_backup has been succesfully truncated to create the next backup.')
                logging.info(f'\n[GCV_PRD].{tbl}_backup has been succesfully truncated to create the next backup.\n')
                con.commit()
                cur.close()
            except Exception as err:
                logging.exception("message")
                cur.close()
                con.rollback()
                #raise err
        else:
            try:
                cur = con.cursor()
                query = "TRUNCATE TABLE [GCV_STG].[%s_backup]" % tbl
                cur.execute(query)
                print(f'\n[GCV_STG].{tbl}_backup has been succesfully truncated to create the next backup.')
                logging.info(f'\n[GCV_STG].{tbl}_backup has been succesfully truncated to create the next backup.\n')
                con.commit()
                cur.close()
            except Exception as err:
                logging.exception("message")
                cur.close()
                con.rollback()
                #raise err

    def importdata(con, tbl):
        if stage == False:
            cur = con.cursor()
            engine = sa.create_engine(f'mssql+pyodbc://{server}/{database}?driver={dformat}', fast_executemany = True)
            #pd.io.sql._is_sqlalchemy_connectable(engine)
            df.to_sql(f'{tbl}', engine, index = False, if_exists = 'append', schema = 'GCV_PRD')
            cur.close()
            con.commit()
            return
        else:
            cur = con.cursor()
            engine = sa.create_engine(f'mssql+pyodbc://{server}/{database}?driver={dformat}', fast_executemany = True)
            #pd.io.sql._is_sqlalchemy_connectable(engine)
            df.to_sql(f'{tbl}', engine, index = False, if_exists = 'append', schema = 'GCV_STG')
            cur.close()
            con.commit()
            return
            
    def getData(CV):
        # convert to config file/table
        if stage == False:
            url = f'https://api.pwcinternal.com:7443/GlobalCVService/GlobalCVService.svc/cv/{CV}'
            with open(r'config\apiconnect.json') as f:
                headers = json.load(f)
        else:
            url = f'https://api-staging.pwcinternal.com:7443/GlobalCVService/GlobalCVService.svc/cv/{CV}'
            with open(r'config\apiconnectb.json') as f:
                headers = json.load(f)
                
        retry_strategy = Retry(total = 10, status_forcelist=[429, 413, 503], method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"])
        adapter = HTTPAdapter(max_retries=retry_strategy)
        http = requests.Session()
        
        http.mount("https://", adapter)
        r = http.get(url, headers=headers, timeout = 100)
        rjson = r.json()
        keylist = ('URI','Categories','RelatedTerms')
        
        if 'ErrorMessage' in rjson:
            if rjson['ErrorMessage'] == 'CV does not exist in CVMaster List':
                print('CV Currently Missing from CVMaster List')
                return
        else:
            for key in keylist:
                rjson = [{k: v for k, v in d.items() if k != key} for d in rjson]
        
            global df, recnum
            df = pd.DataFrame(rjson)
            datelist = ('CreatedDate','ModifiedDate','EffectiveDate', 'HierarchyNodeEffectiveDate', 'HierarchyNodeExpirationDate','RelModifiedDate','ExpiryDate','Effective_Date','Expiration_Date','Created_Datetime','Last_Modified_Datetime')
            date_format = "%Y%m%d%H%M%S"
            
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', FutureWarning)
                for date in enumerate(datelist): 
                    if date[1] in df:
                        
                        if df[date[1]].iteritems() != 'None':
                            if df[date[1]].str.contains('Z').items():
                                df[date[1]] = df[date[1]].str.replace("\.[0-9]*Z", "").str.replace("Z", "")
                        if df[date[1]].iteritems() != 'None':   
                            if df[date[1]].str.contains('-').items():
                                df[date[1]] = df[date[1]].str.replace("\W+", "")
                                
                        df[date[1]] = df[date[1]].mask(df[date[1]].str.len() > 14, df[date[1]].str[:-3])
                        df[date[1]] = pd.to_datetime(df[date[1]], format=date_format, errors = 'coerce')
                        
                    else:
                        continue
                    
            recnum = len(df.index)
            
            #display(df)
            #df.to_excel('output1.xlsx')
            
            if tablecontent(con, f'{CV}') == False:
                importdata(con, f'{CV}')
                print(f'\n{CV} has been updated with {recnum} records')
                logging.info(f'\n{CV} has been updated with {recnum} records\n')
            else:
                truncate_table(con, f'{CV}')
                importdata(con, f'{CV}')
                print(f'\n{CV} has been updated with {recnum} records')
                logging.info(f'\n{CV} has been updated with {recnum} records\n')
                
            #colnames = list(df)
            
            #display(colnames) 
             #[['Effective_Date','Expiration_Date','Created_Datetime','Last_Modified_Datetime']])
            
            #--fetch column names for table creation and datatypes
            #for colname, dt in itertools.product([df.columns],[df.dtypes]):
                #print(dt)
            #print('\n')
        
            return df
    
    
    #Fetches list of names for both prod and stage import
    with open(r'C:\Users\gmoye001\config\CVsStage.csv', 'r') as cv_config:
        CVsStage = cv_config.read().split(',')
    with open(r'C:\Users\gmoye001\config\CVsProd.csv', 'r') as cv_config:
        CVsProd = cv_config.read().split(',')

    #Used for testing a group of CV's
    CV = ['LEL-PwCLegalEntity-en','NS-PwCNetworkNode-en',
           'NS-PwCNetworkNode-en-Territory',
           'ORD-CostCenter']
    #Used for testing a single CV
    CVx = ['ORD-CostCenter']
    
    CVd = ['ORD-CostCenter']
    
    #Upon Initial execution, this is the first process that takes place to test connectivity between you and the server
    st = datetime.now()
    open_connection()
    connection_test()
    close_connection()
    
    #Staging process
    for urls in enumerate(CVsStage):
        global stage, conn
        stage = True
        conn = True
        open_connection()
        #getData(urls[1])
        #close_connection()
        if checktables(con, urls[1]) == False:
            print("Moving to next table.\n", end = "\r")
        else:
            if checkbackups(con, urls[1]) == False:
                create_backuptable(con, urls[1])
                createdbackup = True
            else:
                createdbackup = False
            try:
                gstart_time = datetime.now()
                getData(urls[1])
                if createdbackup == False:
                    backupcheck(con, urls[1])
                else:
                    pass
                close_connection()
                stage = False
                conn = False
                gend_time = datetime.now()
                print('\nDuration: {}'.format(gend_time - gstart_time))
                logging.info('\nDuration: {}'.format(gend_time - gstart_time))
                print("")
            except (Exception, pyodbc.DatabaseError) as e:
                print("")
                print(e)
                logging.exception('\n')
                logging.exception("message")
        for i in range(10, -1, -1):
                print(f"{i} seconds until next table is imported ", end = "\r")
                ti.sleep(1)
    
    
    #Production process
    for urls in enumerate(CVsProd):
        open_connection()
        stage = False
        conn = True
        #getData(urls[1])
        #close_connection()
        if checktables(con, urls[1]) == False:
            print("Moving to next table.\n", end = "\r")
        else:
            if checkbackups(con, urls[1]) == False:
                create_backuptable(con, urls[1])
                createdbackup = True
            else:
                createdbackup = False
            try:
                gstart_time = datetime.now()
                getData(urls[1])
                if createdbackup == False:
                    backupcheck(con, urls[1])
                else:
                    pass
                close_connection()
                conn = False
                gend_time = datetime.now()
                print('\nDuration: {}'.format(gend_time - gstart_time))
                logging.info('\nDuration: {}'.format(gend_time - gstart_time))
                print("")
            except (Exception, pyodbc.DatabaseError) as e:
                print("")
                print(e)
                logging.exception('\n')
                logging.exception("message")
        for i in range(10, -1, -1):
                print(f"{i} seconds until next table is imported ", end = "\r")
                ti.sleep(1)
            
    et = datetime.now()
    print('Total Execution Duration: {}'.format(et - st),'\n-Import Completed-')
    tt = et - st
    logging.info(f'\nTotal Execution Duration: {tt}\n')
    logging.info('\n-Import Completed-\n')
    
#exceptions 
except (Exception, pyodbc.DatabaseError) as error:
        print(error)
        logging.exception("message")
        pass
    
except requests.exceptions.HTTPError as errh:
    print("Http Error:",  errh)
    conn = False
    logging.exception("message")
    
except requests.exceptions.ConnectionError as errc:
    print("Error Connecting:", errc)
    conn = False
    logging.exception("message")
    
except requests.exceptions.Timeout as errt:
    print("Timeout Error:", errt)
    conn = False
    logging.exception("message")
    
except requests.exceptions.RequestException as erru:
    print("Unidentified Request Exception:", erru)
    conn = False
    logging.exception("message")
        
finally:
    logging.info(f'\nLOG END: {datetime.now()}\n')
    if conn == True:
        close_connection()
        cur.close()

Connection established to:  Microsoft SQL Server 2016 (SP2) (KB4052908) - 13.0.5026.0 (X64) 
	Mar 18 2018 09:11:49 
	Copyright (c) Microsoft Corporation
	Enterprise Edition (64-bit) on Windows Server 2016 Datacenter 10.0 <X64> (Build 14393: ) (Hypervisor)

[('GCV_API', 'GCV_STG', 'Onboarding_ORD-CostCentreLegalEntity-en-GlobalHierarchy_Denormalised', 'BASE TABLE')]
Current backup for [GCV_STG].Onboarding_ORD-CostCentreLegalEntity-en-GlobalHierarchy_Denormalised
6581 Existing records
[GCV_STG].Onboarding_ORD-CostCentreLegalEntity-en-GlobalHierarchy_Denormalised has been succesfully truncated to import new data.

Onboarding_ORD-CostCentreLegalEntity-en-GlobalHierarchy_Denormalised has been updated with 6581 records

# of records in each table:  6581 6581
[GCV_STG].Onboarding_ORD-CostCentreLegalEntity-en-GlobalHierarchy_Denormalised_backup has been succesfully truncated to create the next backup.

SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'GCV_STG' AND TABLE_


None


Backup Duration: 0:00:00.209413


Duration: 0:00:23.290083

[('GCV_API', 'GCV_PRD', 'NS-PwCNetworkNode-en-Territory', 'BASE TABLE')]
Current backup for [GCV_PRD].NS-PwCNetworkNode-en-Territory exists.
112 Existing records
[GCV_PRD].NS-PwCNetworkNode-en-Territory has been succesfully truncated to import new data.

NS-PwCNetworkNode-en-Territory has been updated with 112 records

# of records in each table:  112 112
[GCV_PRD].NS-PwCNetworkNode-en-Territory_backup has been succesfully truncated to create the next backup.

SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'GCV_PRD' AND TABLE_NAME = 'NS-PwCNetworkNode-en-Territory'


Id, Name, VocabName, ActiveStatus, ApprovalStatus, CreatedDate, ModifiedDate, NodeName, PRID, PartyID, NetworkNodeID, LocalAccountingCurrency, RegionalReportingCurrency, Definition, EffectiveDate, LastEditedReason, Systemofrecord, SourceMDMGERCode, SourceMDMSortPrefix, NodeTypeId, NodeType, Abbreviation, SCMember, GGCEPrefix, NLTMe

[('GCV_API', 'GCV_PRD', 'CES-Country-en', 'BASE TABLE')]
Current backup for [GCV_PRD].CES-Country-en exists.
299 Existing records
[GCV_PRD].CES-Country-en has been succesfully truncated to import new data.

CES-Country-en has been updated with 299 records

# of records in each table:  299 299
[GCV_PRD].CES-Country-en_backup has been succesfully truncated to create the next backup.

SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'GCV_PRD' AND TABLE_NAME = 'CES-Country-en'


Id, Name, VocabName, ActiveStatus, CreatedDate, ModifiedDate, COUNTRY.KEY, WB.CODE, DB.NAME, WB.LONG.NAME, PRIMARY.CURRENCY, WB.SHORT.NAME, ISO.CODE, ISO.LONG.NAME, ISO.SHORT.NAME, TERRITORY.KEY, WB.COMMENTS, ALT.ISO.LONG.NAME, importTime



SELECT is_identity FROM sys.identity_columns WHERE object_id in ( SELECT id FROM sysobjects WHERE NAME = 'CES-Country-en')


None


Backup Duration: 0:00:00.219696


Duration: 0:00:15.802165

Total Execution Duration: 0:07:42.651949 
-Import Completed-


In [6]:
#Pseudocode for automated field creation/deletion

import requests
import urllib
import itertools
from requests.exceptions import ConnectionError, HTTPError, Timeout, TooManyRedirects
import traceback
import logging
import logging.handlers
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pyodbc
import sqlalchemy as sa
from sqlalchemy import create_engine, event
import json
import time as ti
import datetime
from datetime import datetime
import re

global df, df2
con = None
try:
    
    def open_connection():
        global server, database, driver, connection, con
        with open(r'C:\Users\gmoye001\config\config.json', 'r') as fh:
            config = json.load(fh)
        server = config['server']
        database = config['database']
        driver = config['driver']
        connection = f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes'
        con = pyodbc.connect(connection)
        return con
    
    def connection_test():
        cur = con.cursor()
        cur.execute("SELECT @@version")
        row = cur.fetchone()
        print("Connection established to: ",row[0])
        cur.close()
        con.commit()
        return 
        
    def close_connection():
        con.close()
        return
    
    def fetch_dbtbl_column_names(con, tbl):
        if stage == False:
            cur = con.cursor()
            query = "SELECT * FROM [GCV_PRD].[%s]" % tbl
            cur.execute(query)
            dbcols = [column[0] for column in cur.description]
            cur.close()
            return dbcols
        else:
            cur = con.cursor()
            query = "SELECT * FROM [GCV_STG].[%s]" % tbl
            cur.execute(query)
            dbcols = [column[0] for column in cur.description]
            cur.close()
            return dbcols
        
    #def create_field():
    #    global df, df2
    #    for field in enumerate(apifields):
    #        if field[1] not in dbtblfields:
    #            print('creating db table field')
    #            
    #            if df[f'{field[1]}'].dtype == 'int64':
    #                datatype = 'BigInt'
    #            if df[f'{field[1]}'].dtype == 'object':
    #                print(max(df.loc[:,field[1]].apply(len)))
    #                if max(df.loc[:,field[1]].apply(len)) < 255:
    #                    datatype = 'nvarchar(510)'
    #                elif max(df.loc[:, field[1]].apply(len)) > 255:
    #                    datatype = 'varchar(MAX)'
    #            if 'Date' in field[1]:
    #                datatype = 'datetime'  
    #            else:
    #                datatype = df[f'{field[1]}'].dtype
    #            print('datatype is', df[f'{field[1]}'].dtype, ', Post Analysis datatype is', datatype,', fieldname is', field[1])
    #            df2 = pd.concat([df2, df[f'{field[1]}']], axis =1)
    #    return print('Field Creation Complete')

    #    
    #def delete_field():
    #    global df, df2
    #    for field in enumerate(dbtblfields):
    #        if field[1] not in apifields:
    #            print('deleting db table field')
    #            print('datatype is', df2[f'{field[1]}'].dtype, ', fieldname is', field[1])
    #            del df2[f'{field[1]}']
    #    return print('Field Deletion Complete')
    #
    #print(len(apifields), 'fields in the api table')
    #print(len(dbtblfields[:-1]), 'fields in the database table')
    #
    #if len(apifields) > len(dbtblfields[:-1]):
    #    create_field()
    #    df2.insert(len(df2)+1, 'importTime', df2.pop('importTime'))
    #    display(df, df2)
    #if len(apifields) < len(dbtblfields[:-1]):
    #    delete_field()
    #    display(df, df2)
    
    with open(r'C:\Users\gmoye001\config\CVsStage.csv', 'r') as cv_config:
        CVsStage = cv_config.read().split(',')
    with open(r'C:\Users\gmoye001\config\CVsProd.csv', 'r') as cv_config:
        CVsProd = cv_config.read().split(',')

    def getData(CV):
        
        if stage == False:
            url = f'https://api.pwcinternal.com:7443/GlobalCVService/GlobalCVService.svc/cv/{CV}'
            with open(r'C:\Users\gmoye001\config\apiconnect.json') as f:
                headers = json.load(f)
        else:
            url = f'https://api-staging.pwcinternal.com:7443/GlobalCVService/GlobalCVService.svc/cv/{CV}'
            with open(r'C:\Users\gmoye001\config\apiconnectb.json') as f:
                headers = json.load(f)
    
        
        r = requests.get(url, headers=headers)
        rjson = r.json()
        keylist = ('URI','Categories','RelatedTerms')
        
        for key in keylist:
            rjson = [{k: v for k, v in d.items() if k != key} for d in rjson]
        
        global df
        df = pd.DataFrame(rjson)
        
        apifields = list(df.columns)
        #fetch_dbtbl_column_names(con, f'{CV}')
        
        print(apifields, '\n')
        dbcols = fetch_dbtbl_column_names(con, f'{CV}')
        print(dbcols[:-1], '\n')
        
    open_connection()
    connection_test()
    close_connection()
    
    for urls in enumerate(CVsStage):
        global stage, conn
        open_connection()
        stage = True
        conn = True
        getData(urls[1])
        close_connection()
        conn = False
        for i in range(2, -1, -1):
                print(f"{i} seconds until next table is imported ", end = "\r")
                ti.sleep(1)

    for urls in enumerate(CVsProd):
        open_connection()
        stage = False
        conn = True
        getData(urls[1])
        close_connection()
        conn = False
        for i in range(2, -1, -1):
                print(f"{i} seconds until next table is imported ", end = "\r")
                ti.sleep(1)
    


#def create_field(con, tbl):
#    for fields in enumerate(apifields):
#        if stage == False:
#            if fields[1] not in dbtblfields
#                cur = con.cursor()
#                query = f"ALTER TABLE [GCV_PRD].[%s] ADD {fields[1]} ;" % tbl
#                cur.execute(query)
#                output = cur.fetchall()
#                cur.close()
#                print(output, end = "\r")
#                logging.info(output, end = "\r")
#
#        else:
#            if fields[1] not in dbtblfields
#                cur = con.cursor()
#                query = f"ALTER TABLE [GCV_STG].[%s] ADD {fields[1]} ;" % tbl
#                cur.execute(query)
#                output = cur.fetchall()
#                cur.close()
#                print(output, end = "\r")
#                logging.info(output, end = "\r")
#        return
#    
#def delete_field(con, tbl):
#    for fields in enumerate(apifields):
#        if stage == False:
#            if fields[1] not in apifields
#                cur = con.cursor()
#                query = f"ALTER TABLE [GCV_PRD].[%s] DROP COLUMN {fields[1]};" % tbl
#                cur.execute(query)
#                output = cur.fetchall()
#                cur.close()
#                print(output, end = "\r")
#                logging.info(output, end = "\r")
#        else:
#            if fields[1] not in dbtblfields
#                cur = con.cursor()
#                query = f"ALTER TABLE [GCV_STG].[%s] DROP COLUMN {fields[1]};" % tbl
#                cur.execute(query)
#                output = cur.fetchall()
#                cur.close()
#                print(output, end = "\r")
#                logging.info(output, end = "\r")
#        return

#exceptions 
except (Exception, pyodbc.DatabaseError) as error:
        print(error)
        pass
    
except requests.exceptions.HTTPError as errh:
    print("Http Error:",  errh)
    con = False
    
except requests.exceptions.ConnectionError as errc:
    print("Error Connecting:", errc)
    con = False
    
except requests.exceptions.Timeout as errt:
    print("Timeout Error:", errt)
    con = False
    
except requests.exceptions.RequestException as erru:
    print("Unidentified Request Exception:", erru)
    con = False
        
finally:
    if conn == True:
        close_connection()
    if cur

Connection established to:  Microsoft SQL Server 2016 (SP2) (KB4052908) - 13.0.5026.0 (X64) 
	Mar 18 2018 09:11:49 
	Copyright (c) Microsoft Corporation
	Enterprise Edition (64-bit) on Windows Server 2016 Datacenter 10.0 <X64> (Build 14393: ) (Hypervisor)

['CostCenterPwCNetworkNodeId', 'CostCenterPwCNetworkDescriptor', 'CostCenterId', 'LocalCostCenterCode', 'CostCenterName', 'UniversalCostCenterCode', 'LegalEntityPartyId', 'LegalEntityName', 'ModifiedDate'] 

['CostCenterPwCNetworkNodeId', 'CostCenterPwCNetworkDescriptor', 'CostCenterId', 'LocalCostCenterCode', 'CostCenterName', 'UniversalCostCenterCode', 'LegalEntityPartyId', 'LegalEntityName', 'ModifiedDate'] 

['Local_Cost_Center_Code', 'Cost_Center_Name', 'Cost_Center_SK', 'Universal_Cost_Center_Code', 'Cost_Center_NK', 'Cost_Type_Descriptor', 'Cost_Type_UID', 'PwC_Network_Descriptor', 'PwC_Network_UID', 'OS_Global_Sub_LoS_Descriptor', 'Global_LoS_UID', 'OS_Function_Descriptor', 'Function_UID', 'Employing_Flag', 'Cost_Center_Statu

KeyboardInterrupt: 

In [5]:
import requests
import urllib
import itertools
from requests.exceptions import ConnectionError, HTTPError, Timeout, TooManyRedirects
import traceback
import logging
import logging.handlers
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pyodbc
import sqlalchemy as sa
from sqlalchemy import create_engine, event
import json
import time as ti
import datetime
from datetime import datetime
import re

    #Opens connection to the SQL Server via the stored config file in the config folder
def open_connection():
    global server, database, driver, connection, con
    with open(r'C:\Users\gmoye001\config\config.json', 'r') as fh:
        config = json.load(fh)
    server = config['server']
    database = config['database']
    driver = config['driver']
    connection = f'DRIVER={driver};SERVER={server};DATABASE={database};Trusted_Connection=yes'
    con = pyodbc.connect(connection)
    return con

#Tests the connction to the sql server by executing a query that returns the information of the server
def connection_test():
    cur = con.cursor()
    cur.execute("SELECT @@version")
    row = cur.fetchone()
    print("Connection established to: ",row[0])
    cur.close()
    con.commit()
    logging.info(f"\nConnection established to: {row[0]}\n")
    return 

#simply closes the connection to the server    
def close_connection():
    con.close()
    return

In [6]:
open_connection()

<pyodbc.Connection at 0x1b77f90c2a0>

In [7]:
connection_test()

Connection established to:  Microsoft SQL Server 2016 (SP2) (KB4052908) - 13.0.5026.0 (X64) 
	Mar 18 2018 09:11:49 
	Copyright (c) Microsoft Corporation
	Enterprise Edition (64-bit) on Windows Server 2016 Datacenter 10.0 <X64> (Build 14393: ) (Hypervisor)



In [8]:
close_connection()