In [947]:
import pyodbc as dbc
import pandas as pd
import sqlalchemy as sa
import numpy
import os

DB1_NAME = 'opsmdd_CRDH_DEV.mdb'
DB2_NAME = 'opsmdd_CRDH_DEV-2.mdb'

In [948]:
def get_driver() -> str:
    driver_names = [x for x in dbc.drivers() if "*.mdb" in x]
    if driver_names:
        return driver_names[0]

In [949]:
# Function to connect to an Access Database (.mdb)
def create_eng(db_path:str) -> sa.Engine:
    conn_str = r'DRIVER={}; DBQ={};'.format(get_driver(), db_path)
    connection_url = sa.engine.URL.create(
        "access+pyodbc",
        query={"odbc_connect": conn_str}
    )
    return sa.create_engine(connection_url)

In [950]:
# Function to read a table into a pandas DataFrame
def read_table(connection:dbc.Connection, table_name:list) -> pd.DataFrame:
    query = f"SELECT * FROM [{table_name}]"
    return pd.read_sql(query, connection)

In [951]:
# Get the current working directory
current_dir = os.getcwd()
# Build full paths for the database files
db1_path = os.path.join(current_dir, DB1_NAME)
db2_path = os.path.join(current_dir, DB2_NAME)

In [952]:
# Create sql engine
db1_engine = create_eng(db1_path)
db2_engine = create_eng(db2_path)

In [953]:
# Read Table
df1 = pd.read_sql('vars', db1_engine)
df2 = pd.read_sql('vars', db2_engine)

In [954]:
# Set display to 26 to see all columns
pd.set_option('display.max_columns', 51)
pd.set_option('display.max_rows', 10)

In [955]:
df1.set_index('f_ptid', inplace=True)
df2.set_index('f_ptid', inplace=True)

In [956]:
# df1['source'] = 'db1'
# df2['source'] = 'db2'

In [957]:
# df1.head(3)

In [958]:
# df2.head(3)

In [959]:
merged = pd.merge(df1, df2, on='f_ptid', how='outer', indicator=True)

In [960]:
# merged.head(3)

In [961]:
merged['_merge'].value_counts()

_merge
both          171993
left_only          0
right_only         0
Name: count, dtype: int64

In [962]:
new_in_db1_filter = (merged['_merge'] == 'left_only')
new_in_db2_filter = (merged['_merge'] == 'right_only')
in_both_filter = (merged['_merge'] == 'both')

In [963]:
db1_only_df = merged.loc[new_in_db1_filter]
db2_only_df = merged.loc[new_in_db2_filter]
in_both_df = merged.loc[in_both_filter]

In [964]:
filter_x = (db1_only_df.columns.str.endswith('_x'))
filter_y = (db2_only_df.columns.str.endswith('_y'))

In [965]:
db1_only_df = db1_only_df.loc[:,filter_x]
db2_only_df = db2_only_df.loc[:,filter_y]
in_both_df_db1 = in_both_df.loc[:,filter_x]
in_both_df_db2 = in_both_df.loc[:,filter_y]

In [966]:
# new_df = db1_only.rename(columns=lambda s: s.replace("_x", ""))
# new_df = db1_only.rename(columns={'ConvertedComp': 'SalaryUSD'}, inplace=True)
db1_only_df.columns = [x.replace("_x", "") for x in db1_only_df.columns]
db2_only_df.columns = [x.replace("_y", "") for x in db2_only_df.columns]
in_both_df_db1.columns = [x.replace("_x", "") for x in in_both_df_db1.columns]
in_both_df_db2.columns = [x.replace("_y", "") for x in in_both_df_db2.columns]

In [967]:
db1_only_df.to_csv('add-report.csv', sep=',', index=True, header=True)

In [968]:
in_both_df_db1

Unnamed: 0_level_0,f_flag,f_user,f_brief,f_gid,f_vco,f_dtype,f_precs,f_addr,f_ndim,f_sys,f_unit,f_value,f_pred,f_bflag,f_dim1,f_dim2,f_dim3,f_bound,f_size,f_updat,f_time,f_ldes,f_sflag,f_tflag,rid
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
ACWSCNET,o,,CW BASIN NET MASS IN/OUT lb,2.0,v,r,4,83996.0,0,,,,global02,,0,0.0,0.0,83996.0,4.0,,1.584468e+09,,,,149323
AGEN2AG1,o,,Phase A Current E-RLY-11/G1 (E-COMP-,30.0,v,r,4,3636.0,0,,AMPS,,global30,,0,0.0,0.0,3636.0,4.0,,1.685461e+09,,,,207699
AGEN2AG2,o,,Phase A Current E-RLY-11/G2 (E-COMP-,30.0,v,r,4,3640.0,0,,AMPS,,global30,,0,0.0,0.0,3640.0,4.0,,1.685461e+09,,,,207700
AGEN2AG3,o,,Phase A Current E-RLY-11/G3 (E-COMP-,30.0,v,r,4,3644.0,0,,AMPS,,global30,,0,0.0,0.0,3644.0,4.0,,1.685461e+09,,,,207701
AGEN2ANG1,o,,Neutral A Phase Current E-RLY-11/G1,30.0,v,r,4,3648.0,0,,AMPS,,global30,,0,0.0,0.0,3648.0,4.0,,1.685461e+09,,,,207702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzcspare,o,ops,,6.0,c,r,4,872.0,1,rn,,,global06,,48,0.0,0.0,872.0,192.0,981119.0,1.240220e+05,(null),,,93013
zzfspare,o,ops,SPARE,5.0,v,r,4,125948.0,1,rn,,,dp01tibm,b,32,0.0,0.0,3068.0,128.0,980518.0,1.526180e+05,SPARE \n,,,107034
zzlspare,o,ops,,5.0,v,l,1,134164.0,1,rn,,f,global05,,9,0.0,0.0,134164.0,9.0,981119.0,7.225600e+04,(null),,,27611
zzpmbrkr,o,ops,PUMP MOTOR BREAKER STATUS (O,5.0,v,i,4,116096.0,1,rn,,,dp01fibm,b,2,0.0,0.0,112000.0,8.0,980518.0,1.522480e+05,PUMP MOTOR BREAKER STATUS (O \n,,,62877


In [969]:
in_both_df_db2.columns

Index(['f_flag', 'f_user', 'f_brief', 'f_gid', 'f_vco', 'f_dtype', 'f_precs',
       'f_addr', 'f_ndim', 'f_sys', 'f_unit', 'f_value', 'f_pred', 'f_bflag',
       'f_dim1', 'f_dim2', 'f_dim3', 'f_bound', 'f_size', 'f_updat', 'f_time',
       'f_ldes', 'f_sflag', 'f_tflag', 'rid'],
      dtype='object')

In [970]:
mod_df = in_both_df_db1.compare(in_both_df_db2, align_axis=0)

In [971]:
mod_df

Unnamed: 0_level_0,Unnamed: 1_level_0,f_brief,f_gid,f_dtype,f_ndim
f_ptid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DEHTFKEPS,self,DBZ Protection,,,
DEHTFKEPS,other,DEV2!!!,,,
SAMV_W085,self,,,r,0.0
SAMV_W085,other,,,T,1.0
cslcslc3,self,,4.0,,
cslcslc3,other,,5.0,,


In [972]:
# mod_df.to_csv('mod-report.csv', sep=',', index=True, header=True)

In [973]:
def create_add_file(db:pd.DataFrame, db_name:str)->None:
        with open(f"{db_name}.add", 'w') as f:
            for row in db.itertuples(index=True, name='Pandas'):
                f.write(f'add {row.Index}\n')
                try:
                    f.write(f'.desc {row.f_brief}\n')
                except AttributeError: # No Value
                    f.write(f'.desc\n')
                try:
                    f.write(f'{row.f_ldes}\n')
                except AttributeError: # No Value
                    f.write(f'\n')
                try:
                    if str(row.f_unit) == "None":
                        unit = ""
                    else:
                        unit = row.f_unit       
                    f.write(f'.units {unit}\n')
                except AttributeError: # No Value
                    f.write(f'.units\n')
                f.write(f'.type {row.f_dtype}*{int(row.f_precs)}\n')
                try:
                    if str(row.f_value) == "None":
                        valu = ""
                    else:
                        valu = row.f_value
                    f.write(f'.valu {valu}\n')
                except AttributeError: # No Value
                    f.write(f'.valu\n')
                f.write(f'.dim {int(row.f_dim1)}, {int(row.f_dim2)}, {int(row.f_dim3)}\n')
                f.write(f'.pred {row.f_pred}\n\n')   

In [974]:
# create_add_file(db1_only_df, DB1_NAME)

In [975]:
mod_df.reset_index(inplace=True)

In [976]:
mod_df.rename(columns={'level_1':'db_name'}, inplace=True)
mod_df['db_name'] = mod_df['db_name'].str.replace('self',f'{DB1_NAME}')
mod_df['db_name'] = mod_df['db_name'].str.replace('other',f'{DB2_NAME}')

In [977]:
mod_df

Unnamed: 0,f_ptid,db_name,f_brief,f_gid,f_dtype,f_ndim
0,DEHTFKEPS,opsmdd_CRDH_DEV.mdb,DBZ Protection,,,
1,DEHTFKEPS,opsmdd_CRDH_DEV-2.mdb,DEV2!!!,,,
2,SAMV_W085,opsmdd_CRDH_DEV.mdb,,,r,0.0
3,SAMV_W085,opsmdd_CRDH_DEV-2.mdb,,,T,1.0
4,cslcslc3,opsmdd_CRDH_DEV.mdb,,4.0,,
5,cslcslc3,opsmdd_CRDH_DEV-2.mdb,,5.0,,


In [978]:
filt = (mod_df['f_ptid'] == 'DEHTFKEPS')

In [990]:
mod_df = mod_df[filt]

In [993]:
mod_df.dropna(axis=1)

Unnamed: 0,f_ptid,db_name,f_brief
0,DEHTFKEPS,opsmdd_CRDH_DEV.mdb,DBZ Protection
1,DEHTFKEPS,opsmdd_CRDH_DEV-2.mdb,DEV2!!!


In [980]:
def check_dim_mods():
    if row.f_dim1 and row.dim2 and row.dim3:
        pass
        

In [981]:
# def create_mod_file(db:pd.DataFrame, db_name:str):
#     with open(f"{db_name}.mod", 'w') as f:
#         for row in db.itertuples(index=True, name='Pandas'):
#             f.write(f'mod {row.f_ptid}\n')
#             try:
#                 f.write(f'.desc {row.f_brief}\n')
#             except AttributeError: # No Value
#                 f.write(f'.desc\n')
#             try:
#                 f.write(f'{row.f_ldes}\n')
#             except AttributeError: # No Value
#                 f.write(f'\n')
#             try:
#                 if str(row.f_unit) == "None":
#                     unit = ""
#                 else:
#                     unit = row.f_unit       
#                 f.write(f'.units {unit}\n')
#             except AttributeError: # No Value
#                 f.write(f'.units\n')
#             try:
#                 f.write(f'.type {row.f_dtype}*{int(row.f_precs)}\n')
#             except AttributeError: # No Value
#                 pass
#             try:
#                 if str(row.f_value) == "None":
#                     valu = ""
#                 else:
#                     valu = row.f_value
#                 f.write(f'.valu {valu}\n')
#             except AttributeError: # No Value
#                 f.write(f'.valu\n')
#             f.write(f'.dim {int(row.f_dim1)}, {int(row.f_dim2)}, {int(row.f_dim3)}\n')
#             f.write(f'.pred {row.f_pred}\n\n')               
            

In [982]:
def create_mod_file(db:pd.DataFrame, db_name:str):
    with open(f"{db_name}.mod", 'w') as f:
        for row in db.itertuples(index=True, name='Pandas'):
            # f.write(f'mod {row.f_ptid}\n')
            f.write(f'mod {row}\n')
            
            for column in row:
                pass
                # f.write(f'mod {column.name()}\n')
        

In [983]:
create_mod_file(mod_df, DB1_NAME)

In [984]:
# mod_db1_filter = (mod_df.index == 'self')