In [None]:
import pyodbc
import pandas as pd
import shutil
import time
from datetime import datetime
current_date = datetime.now()
formatted_date = current_date.strftime('%d%m%Y')


def copy_database(original_path, temp_path):
    shutil.copyfile(original_path, temp_path)

def export_access_to_dataframes(database_path):
    # Connection string for Access database
    conn_str = (
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
        r'DBQ=' + database_path + ';'
    )

    # Establish a connection to the Access database
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()

    # Get a list of all tables in the database
    tables = [row.table_name for row in cursor.tables(tableType='TABLE')]

    # Loop through the tables and load each into a DataFrame
    for table in tables:
        query = f'SELECT * FROM [{table}]'
        df = pd.read_sql(query, conn)
        globals()[f'tbl_{table}'] = df  # Create a global variable with the table name

    # Close the connection
    conn.close()

def update_access_table(database_path, table_name, df):
    # Connection string for Access database
    conn_str = (
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
        r'DBQ=' + database_path + ';'
    )

    # Establish a connection to the Access database
    conn = pyodbc.connect(conn_str)
    cursor = conn.cursor()

    # Clear the existing table
    cursor.execute(f'DELETE FROM [{table_name}]')
    conn.commit()

    # Insert the updated data
    for index, row in df.iterrows():
        columns = ', '.join(row.index)
        placeholders = ', '.join(['?' for _ in row])
        values = tuple(row)
        sql = f'INSERT INTO [{table_name}] ({columns}) VALUES ({placeholders})'
        cursor.execute(sql, values)
    conn.commit()

    # Close the connection
    conn.close()

# Paths
original_database_path = 'E:\\digidure\\CLERUS_v1_06082024.accdb'
temp_database_path = 'E:\\digidure\\CLERUS_v1_DRC_'+formatted_date+'.accdb'

# Copy the database
copy_database(original_database_path, temp_database_path)

# Export data from the copied database
export_access_to_dataframes(temp_database_path)

# Processing the data
df = tbl_12_clerus_role
df.sort_values(by=['clerus_id', 'role_start_year'], inplace=True)
df['role_end_year'] = df.groupby('clerus_id')['role_start_year'].shift(-1).where(df['role_end_year'].isna(), df['role_end_year'])

tbl_01_clerus_bio_death = tbl_01_clerus_bio[['clerus_id', 'death_year']]
df_joined = pd.merge(df, tbl_01_clerus_bio_death, on='clerus_id', how='left')
df_joined['role_end_year'] = df_joined.apply(lambda row: row['death_year'] if pd.isna(row['role_end_year']) and not pd.isna(row['role_start_year']) else row['role_end_year'], axis=1)
df_joined.drop(columns=['death_year'], inplace=True)

# Ensure that 'role_start_year' and 'role_end_year' are converted to integers and handle NaN values
df_joined['role_start_year'] = df_joined['role_start_year'].fillna(0).astype(int)
df_joined['role_end_year'] = df_joined['role_end_year'].fillna(0).astype(int)
df_joined['role_place_id'] = df_joined['role_place_id'].fillna(0).astype(int)
df_joined['role_start_date_exact'] = df_joined['role_start_date_exact'].fillna(0).astype(int)
df_joined['role_classis_code'] = df_joined['role_classis_code'].fillna(0).astype(int)
df_joined['role_end_date_exact'] = df_joined['role_end_date_exact'].fillna(0).astype(int)
df_joined['role_residence_place_id'] = df_joined['role_residence_place_id'].fillna(0).astype(int)

In [None]:
# Panda settings for showing data (this is foremost done to more easily explore the data while processing it)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [None]:
# Update the table in the copied database
update_access_table(temp_database_path, '12_clerus_role', df_joined)