# Loading files to the Azure Database

In [11]:
# SQAlchemy
import pandas as pd
from sqlalchemy import create_engine, types
from sqlalchemy import text # to be able to pass string
from sqlalchemy.exc import SQLAlchemyError
from dotenv import dotenv_values # to load the data from .env file


pd.options.display.max_rows = 6000
pd.options.display.max_columns = 6000

#load the data from .env file
config = dotenv_values()

In [12]:
# define variables for the login
pg_user = config['AZURE_USER'] 
pg_host = config['AZURE_HOST']
pg_port = config['AZURE_PORT']
pg_db = config['AZURE_DB']
pg_schema = config['AZURE_SCHEMA']
pg_pass = config['AZURE_PASS']

build the URL

In [13]:
url = f'postgresql://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}'

create the engine

In [14]:

engine = create_engine(url, echo=False)
engine.url

postgresql://AdminCapstone:***@postcapstoneendurance.postgres.database.azure.com:5432/postgres

testing the connection

In [15]:
def test_db_connection(engine):
    try:
        connection = engine.connect() # including 'connection' as variable to close the connection
        print("Connection successful!")
        connection.close() # closing the connection
        return True
    except SQLAlchemyError as e:
        print(f"Connection failed: {e}")
        return False

# Test the connection
test_db_connection(engine)

Connection successful!


True

Build the search path

In [16]:
my_schema = pg_schema 

with engine.begin() as conn: 
    result = conn.execute(text(f'SET search_path TO {my_schema};'))

load DataFrames to Database

In [17]:
# reding the different dataframes into this notebook to then upload them to the database
df_activs_plus = pd.read_csv('./data/df_activs_plus.csv')
df_phys = pd.read_csv('./data/df_phys.csv')
df_runs_plus = pd.read_csv('./data/df_runs_plus.csv')
df_months = pd.read_csv('./data/df_months.csv')
df_activs_change= pd.read_csv('./data/df_activs_change.csv')



In [18]:
df_activs_change["activity id"] = df_activs_change["activity id"].astype(object)

In [19]:
# uploading the dataframes to the database
df_activs_plus.to_sql('df_activs_plus', con=engine, schema='public', if_exists='replace', index=False)
df_phys.to_sql('df_phys', con=engine, schema='public', if_exists='replace', index=False)
df_runs_plus.to_sql('df_runs_plus', con=engine, schema='public', if_exists='replace', index=False)
df_months.to_sql('df_months', con=engine, schema='public', if_exists='replace', index=False)
df_activs_change.to_sql('df_activs_change', con=engine, schema='public', if_exists='replace', index=False)

35

problems during upload of the second and bigger dataset.
After some trys here the sessionmaker as a solution to rollback 

In [20]:
''' 
from sqlalchemy.orm import sessionmaker

# Session für Transaktionskontrolle erstellen
Session = sessionmaker(bind=engine)
session = Session()

# Falls eine vorherige Transaktion fehlgeschlagen ist, zurückrollen
session.rollback()
session.close()

# Danach die Verbindung sauber beenden
engine.dispose()
engine = create_engine(url, echo=False)

# to make sure there are no problems due to an open connection, reset engine before uploading df_stats
# isolate to autocommit = make sure that the connection is closing

engine.dispose()
engine = create_engine(url, isolation_level="AUTOCOMMIT", echo=False)

'''


' \nfrom sqlalchemy.orm import sessionmaker\n\n# Session für Transaktionskontrolle erstellen\nSession = sessionmaker(bind=engine)\nsession = Session()\n\n# Falls eine vorherige Transaktion fehlgeschlagen ist, zurückrollen\nsession.rollback()\nsession.close()\n\n# Danach die Verbindung sauber beenden\nengine.dispose()\nengine = create_engine(url, echo=False)\n\n# to make sure there are no problems due to an open connection, reset engine before uploading df_stats\n# isolate to autocommit = make sure that the connection is closing\n\nengine.dispose()\nengine = create_engine(url, isolation_level="AUTOCOMMIT", echo=False)\n\n'