In [324]:
import pyodbc as dbc
import pandas as pd
import sqlalchemy as sa
import numpy as np
import os

DB1_NAME = 'opsmdd_CRDH_DEV.mdb'
DB2_NAME = 'opsmdd_9_04_24_Load.mdb'

In [325]:
def get_driver() -> str:
    driver_names = [x for x in dbc.drivers() if "*.mdb" in x]
    if driver_names:
        return driver_names[0]

In [326]:
# Function to connect to an Access Database (.mdb)
def create_eng(db_path:str) -> sa.Engine:
    conn_str = r'DRIVER={}; DBQ={};'.format(get_driver(), db_path)
    connection_url = sa.engine.URL.create(
        "access+pyodbc",
        query={"odbc_connect": conn_str}
    )
    return sa.create_engine(connection_url)

In [327]:
# Function to read a table into a pandas DataFrame
def read_table(connection:dbc.Connection, table_name:list) -> pd.DataFrame:
    query = f"SELECT * FROM [{table_name}]"
    return pd.read_sql(query, connection)

In [328]:
# Get the current working directory
current_dir = os.getcwd()
# Build full paths for the database files
db1_path = os.path.join(current_dir, DB1_NAME)
db2_path = os.path.join(current_dir, DB2_NAME)

In [329]:
# Create sql engine
db1_engine = create_eng(db1_path)
db2_engine = create_eng(db2_path)

In [330]:
# Read Table
df1 = pd.read_sql('vars', db1_engine)
df2 = pd.read_sql('vars', db2_engine)

In [331]:
# Set display to 26 to see all columns
pd.set_option('display.max_columns', 51)
pd.set_option('display.max_rows', 10)

In [332]:
df1.set_index('f_ptid', inplace=True)
df2.set_index('f_ptid', inplace=True)

In [333]:
# df1['source'] = 'db1'
# df2['source'] = 'db2'

In [334]:
# df1.head(3)

In [335]:
# df2.head(3)

In [336]:
merged = pd.merge(df1, df2, on='f_ptid', how='outer', indicator=True)

In [368]:
# merged.head(3)

In [338]:
merged['_merge'].value_counts()

_merge
both          171699
left_only        294
right_only       114
Name: count, dtype: int64

In [339]:
new_in_db1_filter = (merged['_merge'] == 'left_only')
new_in_db2_filter = (merged['_merge'] == 'right_only')
in_both_filter = (merged['_merge'] == 'both')

In [340]:
db1_only_df = merged.loc[new_in_db1_filter]
db2_only_df = merged.loc[new_in_db2_filter]
in_both_df = merged.loc[in_both_filter]

In [341]:
filter_x = (db1_only_df.columns.str.endswith('_x'))
filter_y = (db2_only_df.columns.str.endswith('_y'))

In [342]:
db1_only_df = db1_only_df.loc[:,filter_x]
db2_only_df = db2_only_df.loc[:,filter_y]
in_both_df_db1 = in_both_df.loc[:,filter_x]
in_both_df_db2 = in_both_df.loc[:,filter_y]

In [343]:
# new_df = db1_only.rename(columns=lambda s: s.replace("_x", ""))
# new_df = db1_only.rename(columns={'ConvertedComp': 'SalaryUSD'}, inplace=True)
db1_only_df.columns = [x.replace("_x", "") for x in db1_only_df.columns]
db2_only_df.columns = [x.replace("_y", "") for x in db2_only_df.columns]
in_both_df_db1.columns = [x.replace("_x", "") for x in in_both_df_db1.columns]
in_both_df_db2.columns = [x.replace("_y", "") for x in in_both_df_db2.columns]

In [344]:
db1_only_df.to_csv('add-report.csv', sep=',', index=True, header=True)

In [345]:
in_both_df_db1

Unnamed: 0_level_0,f_flag,f_user,f_brief,f_gid,f_vco,f_dtype,f_precs,f_addr,f_ndim,f_sys,f_unit,f_value,f_pred,f_bflag,f_dim1,f_dim2,f_dim3,f_bound,f_size,f_updat,f_time,f_ldes,f_sflag,f_tflag,rid
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
ACWSCNET,o,,CW BASIN NET MASS IN/OUT lb,2.0,v,r,4.0,83996.0,0.0,,,,global02,,0.0,0.0,0.0,83996.0,4.0,,1.584468e+09,,,,149323.0
AGEN2AG1,o,,Phase A Current E-RLY-11/G1 (E-COMP-,30.0,v,r,4.0,3636.0,0.0,,AMPS,,global30,,0.0,0.0,0.0,3636.0,4.0,,1.685461e+09,,,,207699.0
AGEN2AG2,o,,Phase A Current E-RLY-11/G2 (E-COMP-,30.0,v,r,4.0,3640.0,0.0,,AMPS,,global30,,0.0,0.0,0.0,3640.0,4.0,,1.685461e+09,,,,207700.0
AGEN2AG3,o,,Phase A Current E-RLY-11/G3 (E-COMP-,30.0,v,r,4.0,3644.0,0.0,,AMPS,,global30,,0.0,0.0,0.0,3644.0,4.0,,1.685461e+09,,,,207701.0
AGEN2ANG1,o,,Neutral A Phase Current E-RLY-11/G1,30.0,v,r,4.0,3648.0,0.0,,AMPS,,global30,,0.0,0.0,0.0,3648.0,4.0,,1.685461e+09,,,,207702.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzcspare,o,ops,,6.0,c,r,4.0,872.0,1.0,rn,,,global06,,48.0,0.0,0.0,872.0,192.0,981119.0,1.240220e+05,(null),,,93013.0
zzfspare,o,ops,SPARE,5.0,v,r,4.0,125948.0,1.0,rn,,,dp01tibm,b,32.0,0.0,0.0,3068.0,128.0,980518.0,1.526180e+05,SPARE \n,,,107034.0
zzlspare,o,ops,,5.0,v,l,1.0,134164.0,1.0,rn,,f,global05,,9.0,0.0,0.0,134164.0,9.0,981119.0,7.225600e+04,(null),,,27611.0
zzpmbrkr,o,ops,PUMP MOTOR BREAKER STATUS (O,5.0,v,i,4.0,116096.0,1.0,rn,,,dp01fibm,b,2.0,0.0,0.0,112000.0,8.0,980518.0,1.522480e+05,PUMP MOTOR BREAKER STATUS (O \n,,,62877.0


In [346]:
in_both_df_db2.columns

Index(['f_flag', 'f_user', 'f_brief', 'f_gid', 'f_vco', 'f_dtype', 'f_precs',
       'f_addr', 'f_ndim', 'f_sys', 'f_unit', 'f_value', 'f_pred', 'f_bflag',
       'f_dim1', 'f_dim2', 'f_dim3', 'f_bound', 'f_size', 'f_updat', 'f_time',
       'f_ldes', 'f_sflag', 'f_tflag', 'rid'],
      dtype='object')

In [391]:
mod_df = in_both_df_db1.compare(in_both_df_db2, align_axis=0)

In [392]:
mod_df

Unnamed: 0_level_0,Unnamed: 1_level_0,f_brief,f_time,f_ldes
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JMLPCN8,self,E-DP-S0/2 FAILURE,1.489091e+09,
JMLPCN8,other,Drywell Floor Failure with Severity,1.709242e+09,Drywell Floor Failure with Severity
drur003,self,REGEN HTX SHELL RLF DRIFT SEVERITY,1.851310e+05,REGEN HTX SHELL RLF DRIFT SEVERITY \n
drur003,other,REGEN HTX-1A TUBE RLF DRIFT SEVERITY,1.718130e+09,REGEN HTX-1A TUBE RLF DRIFT SEVERITY
frur003,self,REGEN HTX SHELL RLF FLOW RESTRICTION,1.851320e+05,REGEN HTX SHELL RLF FLOW RESTRICTION \n
...,...,...,...,...
rrur003_l,other,REGEN HTX-1A TUBE RLF LEAKAGE FAIL F,1.718127e+09,REGEN HTX-1A TUBE RLF LEAKAGE FAIL FLA
rrur003_s,self,REGEN HTX SHELL RLF FAIL FLAG,1.851310e+05,
rrur003_s,other,REGEN HTX-1A TUBE RLF FAIL FLAG,1.718127e+09,
srur003,self,REGEN HTX SHELL RLF,5.124000e+03,REGEN HTX SHELL RLF OPEN/CLOSE \n


In [365]:
# mod_df.to_csv('mod-report.csv', sep=',', index=True, header=True)

In [403]:
def create_add_file(db:pd.DataFrame, db_name:str)->None:
        with open(f"{db_name}.add", 'w') as f:
            for row in db.itertuples(index=True, name='Pandas'):
                f.write(f'add {row.Index}\n')
                try:
                    f.write(f'.desc {row.f_brief}\n')
                except AttributeError: # No Value
                    f.write(f'.desc\n')
                try:
                    f.write(f'{row.f_ldes}\n')
                except AttributeError: # No Value
                    f.write(f'\n')
                try:
                    if str(row.f_unit) == "None":
                        unit = ""
                    else:
                        unit = row.f_unit       
                    f.write(f'.units {unit}\n')
                except AttributeError: # No Value
                    f.write(f'.units\n')
                f.write(f'.type {row.f_dtype}*{int(row.f_precs)}\n')
                try:
                    if str(row.f_value) == "None":
                        valu = ""
                    else:
                        valu = row.f_value
                    f.write(f'.valu {valu}\n')
                except AttributeError: # No Value
                    f.write(f'.valu\n')
                f.write(f'.dim {int(row.f_dim1)}, {int(row.f_dim2)}, {int(row.f_dim3)}\n')
                f.write(f'.pred {row.f_pred}\n\n')   

In [369]:
# create_add_file(db1_only_df, DB1_NAME)

In [406]:
def create_mod_file(db:pd.DataFrame, db_name:str):
    with open(f"{db_name}.mod", 'w') as f:
        for row in db.itertuples(index=True, name='Pandas'):
            f.write(f'mod {row.Index}\n')

In [407]:
mod_df.columns

Index(['f_brief', 'f_time', 'f_ldes'], dtype='object')

In [408]:
create_mod_file(mod_df, DB1_NAME)