In [447]:
import pyodbc as dbc
import pandas as pd
import sqlalchemy as sa
import os

DB1_NAME = 'opsmdd_CRDH_DEV.mdb'
DB2_NAME = 'opsmdd_9_04_24_Load.mdb'

In [448]:
def get_driver() -> str:
    driver_names = [x for x in dbc.drivers() if "*.mdb" in x]
    if driver_names:
        return driver_names[0]

In [449]:
# Function to connect to an Access Database (.mdb)
def create_eng(db_path:str) -> sa.Engine:
    conn_str = r'DRIVER={}; DBQ={};'.format(get_driver(), db_path)
    connection_url = sa.engine.URL.create(
        "access+pyodbc",
        query={"odbc_connect": conn_str}
    )
    return sa.create_engine(connection_url)

In [450]:
# Function to read a table into a pandas DataFrame
def read_table(connection:dbc.Connection, table_name:list) -> pd.DataFrame:
    query = f"SELECT * FROM [{table_name}]"
    return pd.read_sql(query, connection)

In [451]:
# Get the current working directory
current_dir = os.getcwd()
# Build full paths for the database files
db1_path = os.path.join(current_dir, DB1_NAME)
db2_path = os.path.join(current_dir, DB2_NAME)

In [452]:
# Create sql engine
engine = create_eng(db1_path)

In [453]:
# Read Table
df1 = pd.read_sql('vars', engine)
df2 = pd.read_sql('vars', engine)

In [454]:
# Set display to 26 to see all columns
pd.set_option('display.max_columns', 51)
pd.set_option('display.max_rows', 10)

In [465]:
df1.set_index('f_ptid', inplace=True)
df2.set_index('f_ptid', inplace=True)

In [466]:
# df1['source'] = 'db1'
# df2['source'] = 'db2'

In [467]:
# df1.head(3)

In [468]:
# df2.head(3)

In [474]:
merged = pd.merge(df1, df2, on=['f_ptid'], how='outer', indicator=True)

In [475]:
merged.head(3)

Unnamed: 0_level_0,f_flag_x,f_user_x,f_brief_x,f_gid_x,f_vco_x,f_dtype_x,f_precs_x,f_addr_x,f_ndim_x,f_sys_x,f_unit_x,f_value_x,f_pred_x,f_bflag_x,f_dim1_x,f_dim2_x,f_dim3_x,f_bound_x,f_size_x,f_updat_x,f_time_x,f_ldes_x,f_sflag_x,f_tflag_x,rid_x,f_flag_y,f_user_y,f_brief_y,f_gid_y,f_vco_y,f_dtype_y,f_precs_y,f_addr_y,f_ndim_y,f_sys_y,f_unit_y,f_value_y,f_pred_y,f_bflag_y,f_dim1_y,f_dim2_y,f_dim3_y,f_bound_y,f_size_y,f_updat_y,f_time_y,f_ldes_y,f_sflag_y,f_tflag_y,rid_y,_merge
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
ACWSCNET,o,,CW BASIN NET MASS IN/OUT lb,2.0,v,r,4,83996.0,0,,,,global02,,0,0.0,0.0,83996.0,4.0,,1584468000.0,,,,149323,o,,CW BASIN NET MASS IN/OUT lb,2.0,v,r,4,83996.0,0,,,,global02,,0,0.0,0.0,83996.0,4.0,,1584468000.0,,,,149323,both
AGEN2AG1,o,,Phase A Current E-RLY-11/G1 (E-COMP-,30.0,v,r,4,3636.0,0,,AMPS,,global30,,0,0.0,0.0,3636.0,4.0,,1685461000.0,,,,207699,o,,Phase A Current E-RLY-11/G1 (E-COMP-,30.0,v,r,4,3636.0,0,,AMPS,,global30,,0,0.0,0.0,3636.0,4.0,,1685461000.0,,,,207699,both
AGEN2AG2,o,,Phase A Current E-RLY-11/G2 (E-COMP-,30.0,v,r,4,3640.0,0,,AMPS,,global30,,0,0.0,0.0,3640.0,4.0,,1685461000.0,,,,207700,o,,Phase A Current E-RLY-11/G2 (E-COMP-,30.0,v,r,4,3640.0,0,,AMPS,,global30,,0,0.0,0.0,3640.0,4.0,,1685461000.0,,,,207700,both


In [476]:
merged['_merge'].value_counts()

_merge
both          171993
left_only          0
right_only         0
Name: count, dtype: int64

In [477]:
merge_filt = (merged['_merge'] == 'left_only')

In [478]:
merged.loc[merge_filt]

Unnamed: 0_level_0,f_flag_x,f_user_x,f_brief_x,f_gid_x,f_vco_x,f_dtype_x,f_precs_x,f_addr_x,f_ndim_x,f_sys_x,f_unit_x,f_value_x,f_pred_x,f_bflag_x,f_dim1_x,f_dim2_x,f_dim3_x,f_bound_x,f_size_x,f_updat_x,f_time_x,f_ldes_x,f_sflag_x,f_tflag_x,rid_x,f_flag_y,f_user_y,f_brief_y,f_gid_y,f_vco_y,f_dtype_y,f_precs_y,f_addr_y,f_ndim_y,f_sys_y,f_unit_y,f_value_y,f_pred_y,f_bflag_y,f_dim1_y,f_dim2_y,f_dim3_y,f_bound_y,f_size_y,f_updat_y,f_time_y,f_ldes_y,f_sflag_y,f_tflag_y,rid_y,_merge
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1


In [479]:
new_in_db1 = merged[merged['_merge'] == 'left_only'].dropna(axis=1,how='all')
new_in_db2 = merged[merged['_merge'] == 'right_only'].dropna(axis=1,how='all')

In [481]:
new_in_db2