## DATA UPLOAD

In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
from pathlib import Path
from glob import glob
import pyodbc
from datetime import datetime as dt
from typing import Union, List

In [None]:
def read_data(input_path: Path, 
                file_extension: str = 'csv', 
                delimiter: str = ',',
                encoding: str = 'utf-8', # change to 'windows-1252' if 'utf-8' returns error
                header: int = 0,               
                engine: str = 'c', # when list out errors, use 'python', otherwise use 'c'
               ) -> Union[DataFrame, List[DataFrame]]:
    
    if not input_path.exists():
        raise Exception(f'File at path {path} does not exist')
        
    if input_path.is_dir():
        
        dfs = []
        for input_file in input_path.glob(f'*.{file_extension}'):
            
            df = pd.read_csv(input_file, 
                             sep=delimiter,
                             encoding=encoding,
                             header=header,
                             dtype=str,
                             index_col=None,
            #                 quoting=csv.QUOTE_NONE,
                             engine=engine
            )
            df['DTT_ID'] = np.arange(len(df))
            df['DTT_FILENAME'] = str(input_file)
            dfs.append(df)
            
        return dfs
    else:
        
        df = pd.read_csv(input_file,
                         sep=delimiter,
                         encoding=encoding,
                         header=header,
                         dtype=str,
                         index_col=None,
        #                 quoting=csv.QUOTE_NONE,
                         engine=engine
        )
        df['DTT_ID'] = np.arange(len(df))
        df['DTT_FILENAME'] = str(input_path)
        return df
    

In [None]:
test=read_data(Path(r''))
type(test)

In [None]:
test[0].loc[0,'DTT_FILENAME']

In [None]:
df_headers=pd.concat(test, ignore_index=True)
df_headers.shape

In [None]:
df_headers.tail(2)

In [None]:
df_headers=df_headers.fillna(value='')
# running this step because ('42000', '[42000] [Microsoft][ODBC SQL Server Driver][SQL Server]The incoming tabular data stream (TDS) remote procedure call (RPC) protocol stream is incorrect. Parameter 18 (""): The supplied value is not a valid instance of data type float. Check the source data for invalid values. An example of an invalid value is data of numeric type with scale greater than precision. (8023) (SQLExecDirectW)')

In [None]:
class DataFrameUploader():

    def __init__(self, driver: str = None, server: str = None, database: str = None):

        self.connection = pyodbc.connect(
            driver=driver,
            server=server,
            database=database,
            trusted_connection='yes',
            autocommit=True, # if this is omitted, will need to commit changes manually per query/command - use False to manage transactions
        )

    def upload(self, dataframe: pd.DataFrame = None, initials: str = None): 

        date = dt.today().strftime('%Y%m%d')
        table = f'RAW_xxx_{date}_{initials}'

        cursor = self.connection.cursor()
        
        statement_builder = []
        
        for value in dataframe.columns.values:
            statement_builder.append(f'[{value}] NVARCHAR(MAX)')
            
        statement = ','.join(statement_builder)

        create_table = f'''IF OBJECT_ID('{table}') IS NOT NULL 
        BEGIN
            DROP TABLE {table}
        END
        CREATE TABLE {table} (
        
            {statement})'''
    
        print(create_table)
        
        cursor.execute(create_table)
        
        value_placeholders = ','.join(['?' for _ in range(len(dataframe.columns))])
        query = f'INSERT INTO {table} VALUES ({value_placeholders})'
        cursor.fast_executemany = True
        output=dataframe.to_numpy().tolist()
        print(dt.now())
        cursor.executemany(query, output)
        print(dt.now())

In [None]:
test_loader=DataFrameUploader(driver='{SQL Server}',server='',database='')
test_loader.upload(dataframe=test[0],initials='EL')

## View existing table from SQL DB via Python

In [None]:
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=;'
                      'Database=;'
                      'Trusted_Connection=yes;')

#cursor = conn.cursor()
#cursor.execute('SELECT TOP 1000 * FROM DT_coates_gl_audit.dbo.RAW_KIRBY_OCT_2020')

sql_query = pd.read_sql_query('SELECT TOP 100 * FROM .dbo.',conn)
sql_query
#print(sql_query)
#print(type(sql_query))

## Upload data to SQL Server

In [None]:
import pyodbc

# create random data
price_data = [[2,3,1,2.4,100,'1/2/2019'],
              [3,3,5,9.4,300,'2/1/2020'],
              [4,2,1,2.4,200,'3/1/2021']]

# loop through all the drivers we have access to
for driver in pyodbc.drivers():
    print(driver)

In [None]:
# define the server name and the database name
server = ''
database = ''

# define our connection string
cnxn = pyodbc.connect('DRIVER={SQL Server Native Client RDA 11.0};\
                       SERVER=' + server + ';\
                       DATABASE=' + database + ';\
                       Trusted_connection=yes;')

# create the connection cursor
cursor = cnxn.cursor()

# define our insert query
insert_query = '''INSERT INTO test (close_price, high, low, open_price, volume, day_vale)
                  VALUES(?,?,?,?,?,?);'''


# loop through each row in data
for row in price_data:
    
    # define the values to insert
    values=(row[0],row[1],row[2],row[3],row[4],row[5])
    
    # insert the data into the database
    cursor.execute(insert_query, values)
    
# commit the inserts
cnxn.commit()

# grab all the rows in our database table
cursor.execute('SELECT * FROM test')

# loop through the results
for row in cursor:
    print(row)
    
# close the connection and remove the cursor
cursor.close()
cnxn.close()

In [None]:
## second example


# Create the table
create_table_query="""
-- create the table if it does not exist
IF OBJECT_ID ('test2') IS NULL

CREATE TABLE dbo.test2
(
  [CLIENT Varchar(6)] NVARCHAR(MAX),
  [COMPANY_CODE Varchar(8)] NVARCHAR(MAX),
  [DOCUMENT_NUMBER Varchar(20)] NVARCHAR(MAX),
  [FISCAL_YEAR Varchar(8)] NVARCHAR(MAX),
  [DOCUMENT_TYPE Varchar(4)] NVARCHAR(MAX),
  [DOCUMENT_DATE Varchar(16)] NVARCHAR(MAX),
  [POSTING_DATE Varchar(16)] NVARCHAR(MAX),
  [POSTING_PERIOD Varchar(4)] NVARCHAR(MAX),
  [USER_ID Varchar(24)] NVARCHAR(MAX),
  [CURRENCY Varchar(10)] NVARCHAR(MAX),
  [EXCHANGE_RATE Varchar(28)] NVARCHAR(MAX),
  [ENTRY_DATE Varchar(16)] NVARCHAR(MAX),
  [TRANSACTION_CODE Varchar(40)] NVARCHAR(MAX),
  [DOCUMENT_STATUS Varchar(40)] NVARCHAR(MAX),
  [DESCRIPTION Varchar(50)] NVARCHAR(MAX),
  [DTT_FILENAME] NVARCHAR(MAX)
)
"""

# Define the components of connection string
DRIVER = '{SQL Server}'
SERVER_NAME = ''
DATABASE_NAME = ''

# Define the connectino string
CONNECTION_STRING = """
Driver={driver};
Server={server};
Database={database};
Trusted_Connection=yes;
""".format(
    driver=DRIVER,
    server=SERVER_NAME,
    database=DATABASE_NAME
)


# Connect to the database
connection_object: pyodbc.Connection = pyodbc.connect(CONNECTION_STRING)
    
# Create a cursor object
cursor_object: pyodbc.Cursor = connection_object.cursor()
# until this point, can run the script to see if any returns, which demonstrates if connections to DB is okay


# Define the location of the csv file
#data_file = 'C:\Users\ele\Downloads\0 training\BKPF' # already loaded so comment out

# Load the data into Pandas
#df_headers: pd.DataFrame = pd.read_csv(data_file)


# Print the head
df_headers.head(4)

df_headers.dtypes

# Define the Insert query
sql_insert = """
INSERT INTO dbo.test2
(
  [CLIENT Varchar(6)],
  [COMPANY_CODE Varchar(8)],
  [DOCUMENT_NUMBER Varchar(20)],
  [FISCAL_YEAR Varchar(8)],
  [DOCUMENT_TYPE Varchar(4)],
  [DOCUMENT_DATE Varchar(16)],
  [POSTING_DATE Varchar(16)],
  [POSTING_PERIOD Varchar(4)],
  [USER_ID Varchar(24)],
  [CURRENCY Varchar(10)],
  [EXCHANGE_RATE Varchar(28)],
  [ENTRY_DATE Varchar(16)],
  [TRANSACTION_CODE Varchar(40)],
  [DOCUMENT_STATUS Varchar(40)],
  [DESCRIPTION Varchar(50)],
  [DTT_FILENAME]
)
VALUES
(
    ?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?
)
"""

# Create the table in the database
cursor_object.execute(create_table_query)

# Commit the table we just created
cursor_object.commit()


# Convert the dataframe into a recordset
lt_headers = df_headers.values.tolist()

print(lt_headers[:5]) # as seen, it's a list of list

# Turn on fast load
cursor_object.fast_executemany = True

# Execute the Insert query
cursor_object.executemany(sql_insert, lt_headers)


# Commit the Inserts
cursor_object.commit()
cursor_object.close()

In [None]:
## second example


# Create the table
create_table_query="""
-- create the table if it does not exist
IF OBJECT_ID ('test2') IS NULL

CREATE TABLE dbo.test2
(
    [Amount] NVARCHAR(30),
    [Date effective] NVARCHAR(30),
    [Date posted] NVARCHAR(30),
    [Document Type] NVARCHAR(30),
    [Foreign currency amount] NVARCHAR(30),
    [GL account] VARCHAR(10),
    [Journal description] NVARCHAR(100),
    [Journal ID] NVARCHAR(10),
    [Journal line number] NVARCHAR(30),
    [Posting user] NVARCHAR(30),
    [DTT_FILENAME] NVARCHAR(200)
)
"""

# Define the components of connection string
DRIVER = '{ODBC Driver 17 for SQL Server}'
SERVER_NAME = ''
DATABASE_NAME = ''

# Define the connectino string
CONNECTION_STRING = """
Driver={driver};
Server={server};
Database={database};
Trusted_Connection=yes;
""".format(
    driver=DRIVER,
    server=SERVER_NAME,
    database=DATABASE_NAME
)


# Connect to the database
connection_object: pyodbc.Connection = pyodbc.connect(CONNECTION_STRING)
    
# Create a cursor object
cursor_object: pyodbc.Cursor = connection_object.cursor()
# until this point, can run the script to see if any returns, which demonstrates if connections to DB is okay


# Define the location of the csv file
#data_file = 'C:\Users\ele\Downloads\0 training\BKPF' # already loaded so comment out

# Load the data into Pandas
#df_headers: pd.DataFrame = pd.read_csv(data_file)


# Print the head
df_headers.head(4)

df_headers.dtypes

# Define the Insert query
sql_insert = """
INSERT INTO dbo.test2
(
    [Amount],
    [Date effective],
    [Date posted],
    [Document Type],
    [Foreign currency amount],
    [GL account],
    [Journal description],
    [Journal ID],
    [Journal line number],
    [Posting user],
    [DTT_FILENAME]
)
VALUES
(
    ?,?,?,?,?,?,?,?,?,?,?
)
"""

# Create the table in the database
cursor_object.execute(create_table_query)

# Commit the table we just created
cursor_object.commit()


# Convert the dataframe into a recordset
lt_headers = df_headers.values.tolist()

#print(lt_headers[:5]) # as seen, it's a list of list

# Turn on fast load
cursor_object.fast_executemany = False


# Execute the Insert query
cursor_object.executemany(sql_insert, lt_headers)


# Commit the Inserts
cursor_object.commit()

# close the 
#cursor_object.close()

In [None]:
df_headers.head(2)