In [5]:
import pandas as pd
from datetime import datetime

def create_comparison_df(start_file: str, end_file: str, comparison_type: str) -> pd.DataFrame:
    data = {
        'StartFile': start_file,
        'EndFile': end_file,
        'DateTime': datetime.now(),
        'ComparisonType': comparison_type,
        'Warnings': 'None'
    }
    df = pd.DataFrame(list(data.items()), columns=['Key', 'Value'])
    return df

# Example usage
start_file = "FileNameOld"
end_file = "FileNameNew"
comparison_type = "AIT Comparison"

comparison_df = create_comparison_df(start_file, end_file, comparison_type)
display(comparison_df)
print(comparison_df.to_string(index=False, header=False))


Unnamed: 0,Key,Value
0,StartFile,FileNameOld
1,EndFile,FileNameNew
2,DateTime,2024-08-09 13:03:23.576242
3,ComparisonType,AIT Comparison
4,Warnings,


     StartFile                FileNameOld
       EndFile                FileNameNew
      DateTime 2024-08-09 13:03:23.576242
ComparisonType             AIT Comparison


In [7]:
import pandas as pd
from typing import Dict

def _Create_summary_count_df(data_dict: Dict[str, Dict[str, int]]) -> pd.DataFrame:
    # Define static columns
    static_columns = ["Identical", "New Table"]
    
    # Get dynamic columns from the first dictionary in data_dict
    dynamic_columns = list(next(iter(data_dict.values())).keys())
    
    # Define all columns
    columns = ["Table"] + static_columns + dynamic_columns + ["comments"]
    
    # Initialize an empty list to collect rows
    rows = []
    
    # Iterate through the data_dict to populate the DataFrame
    for table_name, count_dict in data_dict.items():
        row = {
            "Table": table_name,
            **{col: count_dict.get(col, 0) for col in dynamic_columns},
            **{col: 0 for col in static_columns},
            "comments": "none"
        }
        rows.append(row)
    
    # Create the DataFrame from the list of rows
    df_summary = pd.DataFrame(rows, columns=columns)
    
    return df_summary

# Example usage
data_dict = {
    "Table1": {"Col1": 5, "Col2": 3},
    "Table2": {"Col1": 2, "Col3": 7}
}

df_summary = _Create_summary_count_df(data_dict)
print(df_summary)


    Table  Identical  New Table  Col1  Col2 comments
0  Table1          0          0     5     3     none
1  Table2          0          0     2     0     none


In [9]:
import pandas as pd
from typing import Dict

def create_summary_count_df(data_dict: Dict[str, Dict[str, int]]) -> pd.DataFrame:
    # Define static columns with their values
    static_columns = {"Identical": 0, "New Table": 0}
    
    # Get dynamic columns from the first dictionary in data_dict
    dynamic_columns = list(next(iter(data_dict.values())).keys())
    
    # Define all columns
    columns = ["Table"] + list(static_columns.keys()) + dynamic_columns + ["comments"]
    
    # Create rows using list comprehension
    rows = [
        {
            "Table": table_name,
            **{col: count_dict.get(col, 0) for col in dynamic_columns},
            **static_columns,
            "comments": "none"
        }
        for table_name, count_dict in data_dict.items()
    ]
    
    # Create the DataFrame from the list of rows
    df_summary = pd.DataFrame(rows, columns=columns)
    
    return df_summary

# Example usage
data_dict = {
    "Table1": {"Col1": 5, "Col2": 3},
    "Table2": {"Col1": 2, "Col3": 7}
}

df_summary = create_summary_count_df(data_dict)
print(df_summary)


    Table  Identical  New Table  Col1  Col2 comments
0  Table1          0          0     5     3     none
1  Table2          0          0     2     0     none


In [14]:
file_name = f"comparisons_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"

In [15]:
file_name


'comparisons_20240809_150546.xlsx'

In [21]:
import pandas as pd

# Sample DataFrames with MultiIndex columns
columns_df1 = pd.MultiIndex.from_tuples([(1,)], names=['(t)'])
df1 = pd.DataFrame({
    (1,): [0.9, 0.9, 0.9, 0.9]
}, index=pd.MultiIndex.from_tuples([
    ('ALBA', 'BL_KJ', 'ALL'),
    ('ALBA', 'BL_K1', 'ALL'),
    ('ALBA', 'BL_K2', 'ALL'),
    ('ALBA', 'BL_K3', 'ALL')
], names=['Pk.Fund', 'BonusSeries', 'ck.IssYear']))

columns_df2 = pd.MultiIndex.from_tuples([(1,), (2,)], names=['(t)'])
df2 = pd.DataFrame({
    (1,): [0.9, 0.9, 7.8, 5.6, 9, 1.4],
    (2,): [0.9, 0.9, 7.8, 5.6, 9, 1.4]
}, index=pd.MultiIndex.from_tuples([
    ('ALBA', 'BL_KJ', 'ALL'),
    ('ALBA', 'BL_K1', 'ALL'),
    ('ALBA', 'BL_K2', 'ALL'),
    ('ALBA', 'BL_K3', 'ALL'),
    ('BW', 'BW_1', 'ALL'),
    ('BW', 'BW_2', 'ALL')
], names=['Pk.Fund', 'BonusSeries', 'ck.IssYear']))

display(df1)
display(df2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1
Pk.Fund,BonusSeries,ck.IssYear,Unnamed: 3_level_1
ALBA,BL_KJ,ALL,0.9
ALBA,BL_K1,ALL,0.9
ALBA,BL_K2,ALL,0.9
ALBA,BL_K3,ALL,0.9


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1,2
Pk.Fund,BonusSeries,ck.IssYear,Unnamed: 3_level_1,Unnamed: 4_level_1
ALBA,BL_KJ,ALL,0.9,0.9
ALBA,BL_K1,ALL,0.9,0.9
ALBA,BL_K2,ALL,7.8,7.8
ALBA,BL_K3,ALL,5.6,5.6
BW,BW_1,ALL,9.0,9.0
BW,BW_2,ALL,1.4,1.4


In [27]:
display(df1)
display(df2)
# Step 1: Identify common columns
common_columns = df1.columns.intersection(df2.columns)
display(common_columns)
# Step 2: Identify common rows
common_index = df1.index.intersection(df2.index)
display(common_index)



#df1_aligned = df1.reindex(index=common_index, columns=common_columns)
#df2_aligned = df2.reindex(index=common_index, columns=common_columns)
#display(df1_aligned)
#display(df2_aligned)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1
Pk.Fund,BonusSeries,ck.IssYear,Unnamed: 3_level_1
ALBA,BL_KJ,ALL,0.9
ALBA,BL_K1,ALL,0.9
ALBA,BL_K2,ALL,0.9
ALBA,BL_K3,ALL,0.9


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1,2
Pk.Fund,BonusSeries,ck.IssYear,Unnamed: 3_level_1,Unnamed: 4_level_1
ALBA,BL_KJ,ALL,0.9,0.9
ALBA,BL_K1,ALL,0.9,0.9
ALBA,BL_K2,ALL,7.8,7.8
ALBA,BL_K3,ALL,5.6,5.6
BW,BW_1,ALL,9.0,9.0
BW,BW_2,ALL,1.4,1.4


MultiIndex([(1,)],
           )

MultiIndex([('ALBA', 'BL_KJ', 'ALL'),
            ('ALBA', 'BL_K1', 'ALL'),
            ('ALBA', 'BL_K2', 'ALL'),
            ('ALBA', 'BL_K3', 'ALL')],
           names=['Pk.Fund', 'BonusSeries', 'ck.IssYear'])

In [28]:
# Step 4: Find changed values
changed_values = df2.loc[common_index, common_columns].compare(df1.loc[common_index, common_columns])
display (changed_values)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1,1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,self,other
Pk.Fund,BonusSeries,ck.IssYear,Unnamed: 3_level_2,Unnamed: 4_level_2
ALBA,BL_K2,ALL,7.8,0.9
ALBA,BL_K3,ALL,5.6,0.9


In [None]:
# Step 3: Find new columns
new_columns = df2.columns.difference(df1.columns)

# Step 4: Find changed values
changed_values = df2.loc[common_index, common_columns].compare(df1.loc[common_index, common_columns])

# Step 5: Find new rows
new_rows = df2.index.difference(df1.index)
new_rows_df = df2.loc[new_rows]

# Print results
print("Common Columns:")
print(common_columns)
print("\nNew Columns:")
print(new_columns)
print("\nChanged Values:")
print(changed_values)
print("\nNew Rows:")
print(new_rows_df)

# Filtering new columns from df2
filtered_new_columns_df = df2.loc[:, new_columns]
print("\nFiltered New Columns:")
print(filtered_new_columns_df)
