In [25]:
import os
import pandas as pd
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

In [26]:
# Read csv file in data folder
filename = os.listdir(path='data')[0]

print(filename)

240819.csv


In [27]:
# Create dataframe
df_financial_data = pd.read_csv(f'data/{filename}', index_col=False)

print(df_financial_data.head())

  Account Type    Account Number Transaction Date  Cheque Number  \
0         Visa  4514093528573336         8/4/2024            NaN   
1         Visa  4514093528573336         8/4/2024            NaN   
2         Visa  4514093528573336         8/5/2024            NaN   
3         Visa  4514093528573336         8/7/2024            NaN   
4         Visa  4514093528573336         8/8/2024            NaN   

                          Description 1  Description 2   CAD$  USD$  
0                GAME SHACK BRAMPTON ON            NaN -86.69   NaN  
1  COCO FRESH TEA & JUICE BRBRAMPTON ON            NaN  -7.12   NaN  
2   GO TRANSIT - COMPLIANCE OTORONTO ON            NaN -37.00   NaN  
3          TIM HORTONS #9337 TORONTO ON            NaN  -8.10   NaN  
4               Subway 14721 Toronto ON            NaN -14.67   NaN  


In [28]:
# Rename columns
cols = ['ACCOUNT_TYPE','ACCOUNT_NUMBER','TRANSACTION_DATE','CHEQUE_NUMBER','DESCRIPTION_ONE','DESCRIPTION_TWO','CAD','USD']
df_financial_data.columns = cols

# Remove credit card payments
df_financial_data.drop(df_financial_data[df_financial_data['ACCOUNT_TYPE'] == 'Savings'].index, inplace=True)

# Remove savings account transactions
df_financial_data.drop(df_financial_data[df_financial_data['DESCRIPTION_ONE'] == 'PAYMENT - THANK YOU / PAIEMENT - MERCI'].index, inplace=True)

# Make all blank values into NULL
df_financial_data.replace(r'^\s*$', None, regex=True, inplace=True)

# Trim all strings
df_financial_data = df_financial_data.map(lambda x: x.strip() if isinstance(x, str) else x)

print(df_financial_data.head())

  ACCOUNT_TYPE    ACCOUNT_NUMBER TRANSACTION_DATE  CHEQUE_NUMBER  \
0         Visa  4514093528573336         8/4/2024            NaN   
1         Visa  4514093528573336         8/4/2024            NaN   
2         Visa  4514093528573336         8/5/2024            NaN   
3         Visa  4514093528573336         8/7/2024            NaN   
4         Visa  4514093528573336         8/8/2024            NaN   

                        DESCRIPTION_ONE  DESCRIPTION_TWO    CAD  USD  
0                GAME SHACK BRAMPTON ON              NaN -86.69  NaN  
1  COCO FRESH TEA & JUICE BRBRAMPTON ON              NaN  -7.12  NaN  
2   GO TRANSIT - COMPLIANCE OTORONTO ON              NaN -37.00  NaN  
3          TIM HORTONS #9337 TORONTO ON              NaN  -8.10  NaN  
4               Subway 14721 Toronto ON              NaN -14.67  NaN  


In [29]:
load_dotenv('secrets.env')

# SQL Connection
uid = 'sa'
pwd = os.getenv('SQL_SERVER_PWD')
server = os.getenv('DATABASE_URL')
database = 'FINANCIAL_TRANSACTIONS'
driver = 'ODBC+Driver+17+for+SQL+Server'

SQL_conn_str = f'mssql+pyodbc://{uid}:{pwd}@{server}/{database}?driver={driver}'
sql_conn = create_engine(SQL_conn_str)

In [30]:
# Empty Data
# empty_query = \
# text("""
# TRUNCATE TABLE [dbo].[RBC];
# """)

# cursor = sql_conn.connect()
# cursor.execute(empty_query)
# cursor.commit()
# cursor.close()

In [31]:
# Export
df_financial_data.to_excel('Data Validation/transaction_data.xlsx', index=False)
df_financial_data.to_sql('RBC', sql_conn, schema='dbo', if_exists='append', index=False)

15

In [32]:
# Close SQL connection
sql_conn.dispose()