In [72]:
import pandas as pd
import matplotlib.pyplot as plt
import re
from graphviz import Digraph

In [73]:
# Function to clean column names
def clean_column_name(name):
    # Convert to lowercase
    name = name.lower()
    # Replace spaces with underscores
    name = name.replace(' ', '_')
    # Remove special characters (except underscores)
    name = re.sub(r'[^\w\s]', '', name)
    return name

In [74]:
# Function to process columns and split rows
def process_column(df, column_name):
    new_rows = []
    for index, row in df.iterrows():
         # Ensure the column values are strings
        value = str(row[column_name])
        # Split the values in the column based on spaces and strip any extra spaces
        values = [val.strip() for val in row[column_name].split() if val.strip()]
        
        # If there's more than one value, create a new row for each value
        if len(values) > 1:
            for value in values:
                new_row = row.copy()  # Copy the original row
                new_row[column_name] = value  # Update with the trimmed text
                new_rows.append(new_row)
        else:
            # Keep the original row if there's only one value (no split needed)
            new_rows.append(row)
    return pd.DataFrame(new_rows)

In [75]:
# Function to clean data in a column
def clean_column_data(series):
    # Remove special characters and trim spaces
    series = series.apply(lambda x: re.sub(r'[^\w\s]', '', str(x)).strip() if pd.notna(x) else x)
    return series

In [76]:
def clean_account_no(value):
    if pd.isna(value):
        return value  # Return NaN if the value is NaN
    # Split the account number by space
    parts = value.split()
    # Check if all the parts are the same
    if len(parts) > 1 and all(part == parts[0] for part in parts):
        # If they are the same, return only one instance
        return parts[0]
    # If not, return the original value (no change needed)
    return value

In [77]:
def format_amount_indian(amount):
    # Convert the amount to a string and remove any existing commas
    amount_str = str(amount).replace(',', '')
    
    # Check if the number has more than 3 digits
    if len(amount_str) > 3:
        # Get the last 3 digits
        last_three = amount_str[-3:]
        # Get the remaining digits
        remaining = amount_str[:-3]
        # Group digits in thousands (group of 2 after the first group of 3)
        grouped = [remaining[max(0, i-2):i] for i in range(len(remaining), 0, -2)]
        # Reverse and join the grouped digits with commas
        formatted_remaining = ','.join(grouped[::-1])
        # Concatenate the formatted remaining part with the last three digits
        formatted_amount = f'{formatted_remaining},{last_three}'
    else:
        # For amounts less than or equal to 999, no formatting is needed
        formatted_amount = amount_str

    return formatted_amount

In [78]:
def split_text(text, max_width=30):
    """
    Split the text into lines, so that no line exceeds max_width characters.
    """
    words = text.split(' ')
    lines = []
    current_line = []
    current_length = 0
    
    for word in words:
        if current_length + len(word) + 1 > max_width:  # +1 for space
            lines.append(' '.join(current_line))
            current_line = [word]
            current_length = len(word)
        else:
            current_line.append(word)
            current_length += len(word) + 1  # +1 for space
    
    if current_line:
        lines.append(' '.join(current_line))
    
    return '<BR/>'.join(lines)

In [79]:
# Read the CSV file
file_path = 'C:\\Users\\rjsri\Music\\Excel Files\\CR.NO 42-2024-2.xlsx'  # Replace with your file path
df = pd.read_excel(file_path, dtype= str)
df.head()

  file_path = 'C:\\Users\\rjsri\Music\\Excel Files\\CR.NO 42-2024-2.xlsx'  # Replace with your file path


Unnamed: 0,S No.,Acknowledgement No.,Transaction Id / UTR Number,Layer,Account No./ (Wallet /PG/PA) Id,Action Taken by Bank/ (Wallet /PG/PA)/ Merchant// Insurance,Bank/ (Wallet /PG/PA)/ Merchant / Insurance,Account No,Ifsc Code,Cheque No,...,Transaction ID / UTR Number,Transaction Amount,Reference No,Remarks,Date of Action,Action Taken By bank,Action Taken Name,Action Taken By Email,Branch Location,Branch Manager Name & Contact Details
0,1,30206240014752,416538190101,1,58901503252,Money Transfer to,Mobikwik,918058063461 [ Reported 1 times ],PPIW0881822,0,...,416538190101.0,100000,,8058063461@mbk,2024-06-14 18:43:49,NPCI,NPCI API,,,
1,2,30206240014752,453122489174,1,110810100017159,Money Transfer to,State Bank of India,00000043007782956 [ Reported 1 times ],sbin0000539,0,...,453122489174.0,20000,,00000043007782956@sbin0000539.ifsc.npci,2024-06-14 18:43:50,NPCI,NPCI API,,,
2,3,30206240014752,416521672550,1,110810100017159,Money Transfer to,State Bank of India,00000043007782956 [ Reported 1 times ],sbin0000539,0,...,416521672550.0,5000,,00000043007782956@sbin0000539.ifsc.npci,2024-06-14 18:43:50,NPCI,NPCI API,,,
3,4,30206240014752,453123454472,1,110810100017159,Money Transfer to,Mobikwik,919660284823 [ Reported 1 times ],PPIW0881822,0,...,453123454472.0,60000,,9660284823@mbk,2024-06-14 18:43:50,NPCI,NPCI API,,,
4,5,30206240014752,453122489174,1,43007782956,Transaction put on hold,State Bank of India,,,0,...,,20000,,HOLD MARKED,2024-06-15 14:21:12,State Bank of India,V RAJESH,agmsrvl.lhoand@sbi.co.in,,


In [80]:
#df = pd.read_csv('D:\\0000 SB DATA\\transactionData\\CII CSV\\Test2.csv')
new_columns = [
            'S No.', 'acknowledgement_no', 'transaction_id', 'Layer', 'from_account_no', 
            'Action Taken by Bank/ (Wallet /PG/PA)/ Merchant// Insurance', 
            'Bank/ (Wallet /PG/PA)/ Merchant / Insurance', 'to_account_no', 'Ifsc Code', 
            'Cheque No', 'MID', 'TID', 'Approval Code', 'Merchant Name', 'Transaction Date', 
            'utr_number', 'amount', 'Reference No', 'Remarks', 'Date of Action', 
            'Action Taken By bank', 'Action Taken Name', 'Action Taken By Email', 
            'Branch Location', 'Branch Manager Name & Contact Details'
        ]
        # Get the current number of columns in the Excel file
num_current_columns = len(df.columns)
num_new_columns = len(new_columns)

# Adjust new columns if necessary (truncate or extend)
if num_current_columns > num_new_columns:
    # If more columns in the Excel, extend new_columns with 'Unnamed' columns
    extra_columns = [f"Unnamed {i+1}" for i in range(num_current_columns - num_new_columns)]
    adjusted_columns = new_columns + extra_columns
else:
    # If fewer columns in the Excel, truncate the new_columns to match
    adjusted_columns = new_columns[:num_current_columns]
# Replace the column headers with the adjusted columns
df.columns = adjusted_columns
df.columns = [clean_column_name(col) for col in df.columns]
df.columns

Index(['s_no', 'acknowledgement_no', 'transaction_id', 'layer',
       'from_account_no',
       'action_taken_by_bank_wallet_pgpa_merchant_insurance',
       'bank_wallet_pgpa_merchant__insurance', 'to_account_no', 'ifsc_code',
       'cheque_no', 'mid', 'tid', 'approval_code', 'merchant_name',
       'transaction_date', 'utr_number', 'amount', 'reference_no', 'remarks',
       'date_of_action', 'action_taken_by_bank', 'action_taken_name',
       'action_taken_by_email', 'branch_location',
       'branch_manager_name__contact_details'],
      dtype='object')

In [81]:
# # List of columns where you want to append '`'
# columns_to_append = ['acknowledgement_no','transaction_id','to_account_no', 'from_account_no','utr_number']

# # Prepend '`' to every value in the specified columns
# for column in columns_to_append:
#     if column in df.columns:
#         df[column] = df[column].apply(lambda x: '`' + str(x) if pd.notnull(x) else x)


In [82]:
print("Acknowledgement: ",df['acknowledgement_no'].unique())
print("transaction_id: ",df['transaction_id'].unique())
print("to_account_no: ",df['to_account_no'].unique())
print("from_account_no: ",df['from_account_no'].unique())
print("utr_number: ",df['utr_number'].unique())

Acknowledgement:  ['30206240014752']
transaction_id:  ['416538190101' '453122489174' '416521672550' '453123454472'
 '416651381949' '453207981409' '453123454472 416538190101' '416608225450'
 '453207981409 416651381949' '416673992719' '416645708815' '416613321934']
to_account_no:  ['918058063461 [ Reported 1 times ]'
 '00000043007782956 [ Reported 1 times ]'
 '919660284823 [ Reported 1 times ]' nan
 '1519000100709399 [ Reported 2 times ]'
 '55550105510687 [ Reported 1 times ]'
 '50100605570668 [ Reported 1 times ]'
 '50200046577197 [ Reported 2 times ]' '3548752677 [ Reported 1 times ]'
 'ESPL00400356 [ Reported 1 times ]']
from_account_no:  ['58901503252' '110810100017159' '43007782956' ' -:018301585320'
 '919660284823' '50100605570668' '1519000100709399' '55550105510687'
 '50200046577197' '3548752677']
utr_number:  ['416538190101' '453122489174' '416521672550' '453123454472' nan
 '416651381949' '453207981409' '416673992719' '416608225450'
 '416645708815' '416613321934' 'KKBKR1202406140

In [83]:
# Ensure that the 'to_account_no' column is treated as a string
df['to_account_no'] = df['to_account_no'].astype(str)

# Perform the split and handle cases where there's no bracket '[' in the string
df_split = df['to_account_no'].str.split(r'\[', n=1, expand=True)

# Ensure that df_split has two columns by filling missing values with empty strings
df_split[1] = df_split[1].fillna('')  # This handles rows without the '['

# Assign the first part to 'to_account_no' and the second part to 'reported_info'
df['to_account_no'] = df_split[0].str.strip()
df['reported_info'] = df_split[1].str.replace(']', '').str.strip()

# Extract only the number from 'reported_info' (e.g., 'Reported 1 times' -> '1')
df['reported_info'] = df['reported_info'].str.extract(r'(\d+)')

In [84]:
print("to_account_no: ",df['to_account_no'].unique())
print("from_account_no: ",df['from_account_no'].unique())
print("reported_info: ",df['reported_info'].unique())

to_account_no:  ['918058063461' '00000043007782956' '919660284823' 'nan'
 '1519000100709399' '55550105510687' '50100605570668' '50200046577197'
 '3548752677' 'ESPL00400356']
from_account_no:  ['58901503252' '110810100017159' '43007782956' ' -:018301585320'
 '919660284823' '50100605570668' '1519000100709399' '55550105510687'
 '50200046577197' '3548752677']
reported_info:  ['1' nan '2']


In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 26 columns):
 #   Column                                               Non-Null Count  Dtype 
---  ------                                               --------------  ----- 
 0   s_no                                                 16 non-null     object
 1   acknowledgement_no                                   16 non-null     object
 2   transaction_id                                       16 non-null     object
 3   layer                                                16 non-null     object
 4   from_account_no                                      16 non-null     object
 5   action_taken_by_bank_wallet_pgpa_merchant_insurance  16 non-null     object
 6   bank_wallet_pgpa_merchant__insurance                 16 non-null     object
 7   to_account_no                                        16 non-null     object
 8   ifsc_code                                            11 non-null     object
 9   c

In [86]:
# # List of columns to process
# columns_to_process = ['transaction_id'] # add to account no if there are dual records in it

# # Ensure columns are in string format
# for col in columns_to_process:
#     if col in df.columns:
#         df[col] = df[col].astype(str)
        
# # Process each column and merge results
# processed_dfs = [process_column(df, col) for col in columns_to_process]
# # Merge all processed DataFrames
# final_df = pd.concat(processed_dfs, ignore_index=True)

# # Display the updated DataFrame to verify
# df = df.drop_duplicates(keep='first')

# # Save the modified DataFrame to a new CSV file if needed
# #final_df.to_csv('D:\\test\\excel2test_Cleaned.csv', index=False)
# df = final_df
# df.info()

In [87]:
df['amount']=df['amount'].astype(float)
df['layer'] = df['layer'].astype(int)

#df = df.drop(['unique_id'],axis=1)
df = df.drop(['s_no'],axis=1)

In [88]:
# Define the columns to clean
columns_to_clean = [
    'acknowledgement_no', 'transaction_id', 'layer',
    'utr_number', 'amount','to_account_no','from_account_no'
]

# Apply the cleaning function to each specified column
for col in columns_to_clean:
    if col in df.columns:
        df[col] = clean_column_data(df[col])

In [89]:
print("Acknowledgement: ",df['acknowledgement_no'].unique())
print("transaction_id: ",df['transaction_id'].unique())
print("to_account_no: ",df['to_account_no'].unique())
print("from_account_no: ",df['from_account_no'].unique())
print("utr_number: ",df['utr_number'].unique())

Acknowledgement:  ['30206240014752']
transaction_id:  ['416538190101' '453122489174' '416521672550' '453123454472'
 '416651381949' '453207981409' '453123454472 416538190101' '416608225450'
 '453207981409 416651381949' '416673992719' '416645708815' '416613321934']
to_account_no:  ['918058063461' '00000043007782956' '919660284823' 'nan'
 '1519000100709399' '55550105510687' '50100605570668' '50200046577197'
 '3548752677' 'ESPL00400356']
from_account_no:  ['58901503252' '110810100017159' '43007782956' '018301585320'
 '919660284823' '50100605570668' '1519000100709399' '55550105510687'
 '50200046577197' '3548752677']
utr_number:  ['416538190101' '453122489174' '416521672550' '453123454472' nan
 '416651381949' '453207981409' '416673992719' '416608225450'
 '416645708815' '416613321934' 'KKBKR12024061400629567']


In [90]:
#df['unique_id'] = pd.Series(range(1, len(df) + 1))
columns_to_update = ['to_account_no', 'from_account_no', 'ifsc_code', 'utr_number', 'remarks', 'transaction_id']
df.isnull().sum() + (df == '').sum()

acknowledgement_no                                      0
transaction_id                                          0
layer                                                   0
from_account_no                                         0
action_taken_by_bank_wallet_pgpa_merchant_insurance     0
bank_wallet_pgpa_merchant__insurance                    0
to_account_no                                           0
ifsc_code                                               5
cheque_no                                               0
mid                                                    16
tid                                                    16
approval_code                                          16
merchant_name                                          16
transaction_date                                        0
utr_number                                              5
amount                                                  0
reference_no                                           16
remarks       

In [91]:
df['from_account_no'] = df['from_account_no'].replace('nan','NaaN')
df['to_account_no'] = df['to_account_no'].replace('nan','NaaN')

In [92]:

# Replace empty strings and null values with 'NaaN'
df[columns_to_update] = df[columns_to_update].replace('', 'NaaN').fillna('NaaN')
df.isnull().sum() + (df == '').sum()

acknowledgement_no                                      0
transaction_id                                          0
layer                                                   0
from_account_no                                         0
action_taken_by_bank_wallet_pgpa_merchant_insurance     0
bank_wallet_pgpa_merchant__insurance                    0
to_account_no                                           0
ifsc_code                                               0
cheque_no                                               0
mid                                                    16
tid                                                    16
approval_code                                          16
merchant_name                                          16
transaction_date                                        0
utr_number                                              0
amount                                                  0
reference_no                                           16
remarks       

In [93]:
df['layer'] = pd.to_numeric(df['layer'], errors='coerce')
print(df['layer'].unique())

[1 3 2 4 5 6]


In [94]:
df.shape

(16, 25)

In [95]:
df['from_account_no'].unique()

array(['58901503252', '110810100017159', '43007782956', '018301585320',
       '919660284823', '50100605570668', '1519000100709399',
       '55550105510687', '50200046577197', '3548752677'], dtype=object)

In [96]:
df['to_account_no'] = df['to_account_no'].apply(clean_account_no)
df['from_account_no'] = df['from_account_no'].apply(clean_account_no)
print(df.shape)
df['from_account_no'].unique()

(16, 25)


array(['58901503252', '110810100017159', '43007782956', '018301585320',
       '919660284823', '50100605570668', '1519000100709399',
       '55550105510687', '50200046577197', '3548752677'], dtype=object)

In [97]:
ackno = df['acknowledgement_no'].unique()
ackname = 'Transaction Flow Graph For (Ack No): ' + str(ackno)
ackname

"Transaction Flow Graph For (Ack No): ['30206240014752']"

In [98]:
# import pandas as pd
# df.to_csv('D:\\0000 SB DATA\\transactionData\\CII CSV\\Test21.csv', index=False)

In [99]:
df.columns

Index(['acknowledgement_no', 'transaction_id', 'layer', 'from_account_no',
       'action_taken_by_bank_wallet_pgpa_merchant_insurance',
       'bank_wallet_pgpa_merchant__insurance', 'to_account_no', 'ifsc_code',
       'cheque_no', 'mid', 'tid', 'approval_code', 'merchant_name',
       'transaction_date', 'utr_number', 'amount', 'reference_no', 'remarks',
       'date_of_action', 'action_taken_by_bank', 'action_taken_name',
       'action_taken_by_email', 'branch_location',
       'branch_manager_name__contact_details', 'reported_info'],
      dtype='object')

In [100]:
new_df=df[['from_account_no','layer', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance','reported_info', 'utr_number']]
new_df['amount']=(new_df['amount'].astype(int))/10
new_df['amount']= new_df['amount'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['amount']=(new_df['amount'].astype(int))/10
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['amount']= new_df['amount'].astype(int)


In [101]:
new_df['to_account_no'] = new_df['to_account_no'].str.lstrip('0')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['to_account_no'] = new_df['to_account_no'].str.lstrip('0')


In [102]:
new_df

Unnamed: 0,from_account_no,layer,to_account_no,transaction_id,transaction_date,ifsc_code,amount,remarks,action_taken_by_bank_wallet_pgpa_merchant_insurance,reported_info,utr_number
0,58901503252,1,918058063461,416538190101,2024-06-13 18:07:13,PPIW0881822,100000,8058063461@mbk,Money Transfer to,1.0,416538190101
1,110810100017159,1,43007782956,453122489174,2024-06-13 16:07:43,sbin0000539,20000,00000043007782956@sbin0000539.ifsc.npci,Money Transfer to,1.0,453122489174
2,110810100017159,1,43007782956,416521672550,2024-06-13 15:46:50,sbin0000539,5000,00000043007782956@sbin0000539.ifsc.npci,Money Transfer to,1.0,416521672550
3,110810100017159,1,919660284823,453123454472,2024-06-13 16:30:56,PPIW0881822,60000,9660284823@mbk,Money Transfer to,1.0,453123454472
4,43007782956,1,NaaN,453122489174,2024-06-15 00:00:00,NaaN,20000,HOLD MARKED,Transaction put on hold,,NaaN
5,43007782956,1,NaaN,416521672550,2024-06-15 00:00:00,NaaN,5000,HOLD MARKED,Transaction put on hold,,NaaN
6,18301585320,1,1519000100709399,416651381949,2024-06-14 00:00:00,PUNB0151900,20000,NaaN,Money Transfer to,2.0,416651381949
7,18301585320,1,1519000100709399,453207981409,2024-06-14 00:00:00,PUNB0151900,10000,NaaN,Money Transfer to,2.0,453207981409
8,919660284823,1,NaaN,453123454472 416538190101,2024-09-26 17:11:48,NaaN,106000,Necessary action has been taken,Other,,NaaN
9,50100605570668,3,55550105510687,416608225450,2024-06-14 00:00:00,fdrl0000000,75000,NaaN,Money Transfer to,1.0,416673992719


### working fine all nodes are available - remarks added, node colors

In [104]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
#dot.node('title', ackname, shape='plaintext', fontsize='40', fontcolor='blue')
dot.attr(label=ackname, fontsize='30', labelloc='t', fontcolor='blue')
# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Dictionary to store the labels of nodes
node_labels = {}

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate amounts for each from-to account pair
edges_dict = {}
# Dictionary to store the total amount associated with each from_account_no in Layer 1
layer_1_from_sums = {}
# Define fixed width and height for all nodes
fixed_width = '2'
fixed_height = '2'

# Loop through each unique layer with the specified action
for i in new_df['layer'].unique():
    dt = new_df[(new_df['layer'] == i)]

    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer', 'utr_number', 'remarks', 'reported_info',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Remove invalid 'NaaN' or 'NaN' nodes before processing
    layer_nodes = layer_nodes.dropna(subset=['from_account_no', 'to_account_no', 'ifsc_code', 'reported_info'])

    # Layer 1: Sum the 'to_account_no' amounts for each 'from_account_no'
    if i == 1:
        for from_account in unique_from:
            # Sum all amounts sent from 'from_account_no' to 'to_account_no' in layer 1
            total_amount = layer_nodes[layer_nodes['from_account_no'] == from_account]['amount'].sum()
            layer_1_from_sums[from_account] = total_amount  # Store the total amount

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show and avoid 'NaN' nodes
            if not account_info.empty and pd.notna(account_info['ifsc_code'].iloc[0]) and pd.notna(account_info['reported_info'].iloc[0]):
                account_info = account_info.iloc[0]  # Access the first row

                # Determine node shape and color based on 'action_taken_by_bank_wallet_pgpa_merchant_insurance'
                action_taken = account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']
                if 'Withdrawal' in action_taken:
                    shape = 'circle'
                    color = 'blue'
                elif 'On Hold' in action_taken:
                    shape = 'triangle'
                    color = 'red'
                else:
                    shape = 'box'
                    color = 'black'  # Default color for other cases

                # Circular background (badge style) for 'reported_info'
                reported_info = f"""<TD BGCOLOR="#143261" ALIGN="CENTER" STYLE="ROUNDED"><FONT COLOR="white">{account_info['reported_info']}</FONT></TD>"""

                # Check if it's in Layer 1 'from_account_no' to apply the summation
                if i == 1 and account in layer_1_from_sums:
                    # Use the summed amount for 'from_account_no' in Layer 1
                    label = f"""<<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="5">
                <TR><TD><FONT COLOR="#8926B6" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(layer_1_from_sums[account])}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD><TD><FONT COLOR="#6b3700">{account_info['layer']}</FONT></TD></TR>
                <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
                <TR>{reported_info}</TR>
                 <TR><TD><FONT COLOR="black">{split_text(account_info['remarks'])}</FONT></TD></TR>
            </TABLE>>"""
                else:
                    # For all other layers, only show the summed amounts in red and remove original amount
                    label = f"""<<TABLE BORDER="0" CELLBORDER="0" CELLSPACING="5">
                <TR><TD><FONT COLOR="#8926B6" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
                <TR>{reported_info}</TR>
                <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD><TD><FONT COLOR="#6b3700">{account_info['layer']}</FONT></TD></TR>
                <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
                 <TR><TD><FONT COLOR="black">{split_text(account_info['remarks'])}</FONT></TD></TR>
            </TABLE>>"""
                
                # Store the label in node_labels dictionary
                node_labels[account] = label
                dot.node(f'account_{account}', 
                         label=label,
                         shape=shape,  # Dynamic shape based on 'action_taken_by_bank_wallet_pgpa_merchant_insurance'
                         color=color,
                         width=fixed_width,   # Fixed width
                         height=fixed_height) # Fixed height
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and sum amounts if necessary
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if (from_node != to_node) & (to_node != 'account_NaaN'):
            # Create a key to represent the from-to account pair
            edge_key = (from_node, to_node)
            # If this edge already exists, sum the amounts
            if edge_key in edges_dict:
                edges_dict[edge_key]['amount'] += row['amount']  # Sum the amounts
                edges_dict[edge_key]['transaction_ids'].append(row['utr_number'])  # Append the transaction ID
                edges_dict[edge_key]['dates'].append(row['transaction_date'])  # Append the transaction date
            else:
                edges_dict[edge_key] = {
                    'amount': row['amount'],
                    'transaction_ids': [row['utr_number']],
                    'dates': [row['transaction_date']]
                }

# Now create the edges without labels, but add the summed details to the target nodes
for (from_node, to_node), edge_data in edges_dict.items():
    
    # Append the summed information to the 'to' node
    total_amount = format_amount_indian(edge_data['amount'])
    transactions = '@ '.join(edge_data['transaction_ids'])
    dates = '@ '.join(edge_data['dates'])

    # Extract the account number from the to_node
    to_account = to_node.split('_')[1]  # Extract account number from the node ID

    # Get the original label from node_labels
    if to_account in node_labels:
        original_label = node_labels[to_account]

        # Add summed details to the existing label
        additional_label = f"""
        <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{total_amount}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Transactions: {transactions}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Dates: {dates}</FONT></TD></TR>
        </TABLE>>"""
        
        updated_label = original_label.replace('</TABLE>>', additional_label)  # Replace closing tag with additional details
        
        # Update the node with the new label
        dot.node(to_node, label=updated_label)
        # Also update the dictionary to reflect the new label
        node_labels[to_account] = updated_label

    # Create the edge with rounded corners, thicker penwidth, and arrow in the middle
    dot.edge(from_node, to_node, penwidth='2', dir='forward', arrowhead='normal', decorate='true', arrowsize='1.2', style='rounded', minlen='2')


filename = 'NewTesting'
dot.render(filename, format='pdf', cleanup=False)
dot.view()



'NewTesting.pdf'

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
#dot.node('title', ackname, shape='plaintext', fontsize='40', fontcolor='blue')
dot.attr(label=ackname, fontsize='30', labelloc='t', fontcolor='blue')
# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Dictionary to store the labels of nodes
node_labels = {}

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate amounts for each from-to account pair
edges_dict = {}

# Loop through each unique layer with the specified action
for i in new_df['layer'].unique():
    dt = new_df[(new_df['layer'] == i)]

    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer','utr_number','remarks','reported_info',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()
    
    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="#8926B6" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{account_info['utr_number']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(account_info['amount'])}</B></FONT></TD><TD><FONT COLOR="red" POINT-SIZE="15"><B>{account_info['reported_info']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD><TD><FONT COLOR="#6b3700">{account_info['layer']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
             <TR><TD><FONT COLOR="black">{split_text(account_info['remarks'])}</FONT></TD></TR>
        </TABLE>>"""
                
                # Store the label in node_labels dictionary
                node_labels[account] = label
                dot.node(f'account_{account}', 
                         label=label,
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and sum amounts if necessary
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if (from_node != to_node) & (to_node!='account_NaaN'): #if from_node != to_node:
            # Create a key to represent the from-to account pair
            edge_key = (from_node, to_node)
            # If this edge already exists, sum the amounts
            if edge_key in edges_dict:
                edges_dict[edge_key]['amount'] += row['amount']  # Sum the amounts
                edges_dict[edge_key]['transaction_ids'].append(row['utr_number'])  # Append the transaction ID
                edges_dict[edge_key]['dates'].append(row['transaction_date'])  # Append the transaction date
            else:
                edges_dict[edge_key] = {
                    'amount': row['amount'],
                    'transaction_ids': [row['utr_number']],
                    'dates': [row['transaction_date']]
                }

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Now create the edges without labels, but add the summed details to the target nodes
for (from_node, to_node), edge_data in edges_dict.items():
    
    # Append the summed information to the 'to' node
    total_amount = format_amount_indian(edge_data['amount'])
    transactions = '@ '.join(edge_data['transaction_ids'])
    dates = '@ '.join(edge_data['dates'])

    # Extract the account number from the to_node
    to_account = to_node.split('_')[1]  # Extract account number from the node ID

    # Get the original label from node_labels
    if to_account in node_labels:
        original_label = node_labels[to_account]

        # Add summed details to the existing label
        additional_label = f"""
        <TR><TD><FONT COLOR="blue">Total Amount: {total_amount}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Transactions: {transactions}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Dates: {dates}</FONT></TD></TR>
        </TABLE>>"""
        
        updated_label = original_label.replace('</TABLE>>', additional_label)  # Replace closing tag with additional details
        
        # Update the node with the new label
        dot.node(to_node, label=updated_label)
        # Also update the dictionary to reflect the new label
        node_labels[to_account] = updated_label

    # Create the edge without label
    dot.edge(from_node, to_node, minlen='2')

# --- Calculate the required details for the text below the graph ---

# 1. Total number of accounts involved excluding Layer 1 'from_account_no'
layer_1_from_accounts = new_df['to_account_no'].unique()
all_accounts = set(new_df['to_account_no'].unique())

# 2. Calculate number of days from the first to the last transaction
all_dates = pd.to_datetime(new_df['transaction_date'], errors='coerce')  # Convert to datetime
all_dates = all_dates.dropna()  # Drop invalid dates
days_span = (all_dates.max() - all_dates.min()).days  # Calculate days difference

# 3. Number of layers in the graph
num_layers = len(new_df['layer'].unique())

# 4. Count of unique transactions based on 'action_taken_by_bank_wallet_pgpa_merchant_insurance'
transaction_modes = new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'].value_counts()

# --- Show Unique Account Numbers and IFSC Codes ---
unique_account_ifsc = new_df[['to_account_no', 'ifsc_code']].drop_duplicates()

# --- Create a plaintext node to display the summary in a box in the bottom left corner ---
summary_text = f"""<<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="2">
<TR><TD ALIGN="LEFT"><FONT POINT-SIZE="14"><B>Summary of Graph</B></FONT></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Accounts (to_account_no): <B>{len(all_accounts)}</B></TD></TR>
<TR><TD ALIGN="LEFT">Time Span of Transactions: <B>{days_span} days</B></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Layers: <B>{num_layers}</B></TD></TR>
<TR><TD ALIGN="LEFT"><B>Unique Mode of Transactions</B></TD></TR>"""

# Adding each mode and its count to the summary text
for mode, count in transaction_modes.items():
    summary_text += f'<TR><TD ALIGN="LEFT">{mode}: <B>{count}</B></TD></TR>'

# Adding unique accounts and IFSC codes to the summary
summary_text += '<TR><TD ALIGN="LEFT"><B>Unique Accounts and IFSC Codes</B></TD></TR>'
for idx, row in unique_account_ifsc.iterrows():
    summary_text += f'<TR><TD ALIGN="LEFT">Account: {row["to_account_no"]}, IFSC: {row["ifsc_code"]}</TD></TR>'

summary_text += "</TABLE>>"

# --- Add this summary text node at the bottom left corner (rank=sink) ---
dot.node('summary', label=summary_text, shape='plaintext')

# Render the combined graph after processing all layers
filename = 'Final_Graph_with_Summary3'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
for (from_node, to_node), edge_data in edges_dict.items():
    
    # Append the summed information to the 'to' node
    if to_node=="account_NaaN":
       print('hi')
    else:

        total_amount = format_amount_indian(edge_data['amount'])
        transactions = '@ '.join(edge_data['transaction_ids'])
        dates = '@ '.join(edge_data['dates'])

        # Extract the account number from the to_node
        to_account = to_node.split('_')[1]  # Extract account number from the node ID

        # Get the original label from node_labels
        if to_account in node_labels:
            original_label = node_labels[to_account]

            # Add summed details to the existing label
            additional_label = f"""
        <TR><TD><FONT COLOR="blue">Total Amount: {total_amount}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Transactions: {transactions}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Dates: {dates}</FONT></TD></TR>
        </TABLE>>"""
        
            updated_label = original_label.replace('</TABLE>>', additional_label)  # Replace closing tag with additional details
        
        # Update the node with the new label
            dot.node(to_node, label=updated_label)
        # Also update the dictionary to reflect the new label
            node_labels[to_account] = updated_label

    # Create the edge without label
        dot.edge(from_node, to_node, minlen='2')

# --- Calculate the required details for the text below the graph ---

# 1. Total number of accounts involved excluding Layer 1 'from_account_no'
layer_1_from_accounts = new_df[new_df['layer'] == 1]['from_account_no'].unique()
all_accounts = set(new_df['from_account_no'].unique()).union(new_df['to_account_no'].unique())
total_accounts_excluding_layer_1 = all_accounts.difference(layer_1_from_accounts)

# 2. Calculate number of days from the first to the last transaction
all_dates = pd.to_datetime(new_df['transaction_date'], errors='coerce')  # Convert to datetime
all_dates = all_dates.dropna()  # Drop invalid dates
days_span = (all_dates.max() - all_dates.min()).days  # Calculate days difference

# 3. Number of layers in the graph
num_layers = len(new_df['layer'].unique())

# 4. Count of unique transactions based on 'action_taken_by_bank_wallet_pgpa_merchant_insurance'
transaction_modes = new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'].value_counts()

# --- Show Unique Account Numbers and IFSC Codes ---
unique_account_ifsc = new_df[['from_account_no', 'ifsc_code']].drop_duplicates()

# --- Create a plaintext node to display the summary in a box in the bottom left corner ---
summary_text = f"""<<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="2">
<TR><TD ALIGN="LEFT"><FONT POINT-SIZE="14"><B>Summary of Graph</B></FONT></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Accounts (excluding Layer 1's 'from_account_no'): <B>{len(total_accounts_excluding_layer_1)}</B></TD></TR>
<TR><TD ALIGN="LEFT">Time Span of Transactions: <B>{days_span} days</B></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Layers: <B>{num_layers}</B></TD></TR>
<TR><TD ALIGN="LEFT"><B>Unique Mode of Transactions</B></TD></TR>"""

# Adding each mode and its count to the summary text
for mode, count in transaction_modes.items():
    summary_text += f'<TR><TD ALIGN="LEFT">{mode}: <B>{count}</B></TD></TR>'

# Adding unique accounts and IFSC codes to the summary
summary_text += '<TR><TD ALIGN="LEFT"><B>Unique Accounts and IFSC Codes</B></TD></TR>'
for idx, row in unique_account_ifsc.iterrows():
    summary_text += f'<TR><TD ALIGN="LEFT">Account: {row["from_account_no"]}, IFSC: {row["ifsc_code"]}</TD></TR>'

summary_text += "</TABLE>>"

# --- Add this summary text node at the bottom left corner (rank=sink) ---
dot.node('summary', label=summary_text, shape='plaintext')

# Render the combined graph after processing all layers
filename = 'Final_Graph_with_Summary'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
for (from_node, to_node), edge_data in edges_dict.items():
    if to_node=="account_NaaN":
        print('hi')
    else:
        print(from_node, to_node)

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
#dot.node('title', ackname, shape='plaintext', fontsize='40', fontcolor='blue')
dot.attr(label=ackname, fontsize='30', labelloc='t', fontcolor='blue')
# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Dictionary to store the labels of nodes
node_labels = {}

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate amounts for each from-to account pair
edges_dict = {}

# Loop through each unique layer with the specified action
for i in new_df['layer'].unique():
    dt = new_df[(new_df['layer'] == i)]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{account_info['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(account_info['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD><TD><FONT COLOR="#6b3700">{account_info['layer']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""
                
                # Store the label in node_labels dictionary
                node_labels[account] = label
                
                dot.node(f'account_{account}', 
                         label=label,
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and sum amounts if necessary
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            # Create a key to represent the from-to account pair
            edge_key = (from_node, to_node)

            # If this edge already exists, sum the amounts
            if edge_key in edges_dict:
                edges_dict[edge_key]['amount'] += row['amount']  # Sum the amounts
                edges_dict[edge_key]['transaction_ids'].append(row['transaction_id'])  # Append the transaction ID
                edges_dict[edge_key]['dates'].append(row['transaction_date'])  # Append the transaction date
            else:
                edges_dict[edge_key] = {
                    'amount': row['amount'],
                    'transaction_ids': [row['transaction_id']],
                    'dates': [row['transaction_date']]
                }

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Now create the edges without labels, but add the summed details to the target nodes
for (from_node, to_node), edge_data in edges_dict.items():
    # Append the summed information to the 'to' node
    total_amount = format_amount_indian(edge_data['amount'])
    transactions = '@ '.join(edge_data['transaction_ids'])
    dates = '@ '.join(edge_data['dates'])

    # Extract the account number from the to_node
    to_account = to_node.split('_')[1]  # Extract account number from the node ID

    # Get the original label from node_labels
    if to_account in node_labels:
        original_label = node_labels[to_account]

        # Add summed details to the existing label
        additional_label = f"""
        <TR><TD><FONT COLOR="blue">Total Amount: {total_amount}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Transactions: {transactions}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">Dates: {dates}</FONT></TD></TR>
        </TABLE>>"""
        
        updated_label = original_label.replace('</TABLE>>', additional_label)  # Replace closing tag with additional details
        
        # Update the node with the new label
        dot.node(to_node, label=updated_label)
        # Also update the dictionary to reflect the new label
        node_labels[to_account] = updated_label

    # Create the edge without label
    dot.edge(from_node, to_node, minlen='2')

# --- Calculate the required details for the text below the graph ---

# 1. Total number of accounts involved excluding Layer 1 'from_account_no'
layer_1_from_accounts = new_df[new_df['layer'] == 1]['from_account_no'].unique()
all_accounts = set(new_df['from_account_no'].unique()).union(new_df['to_account_no'].unique())
total_accounts_excluding_layer_1 = all_accounts.difference(layer_1_from_accounts)

# 2. Calculate number of days from the first to the last transaction
all_dates = pd.to_datetime(new_df['transaction_date'], errors='coerce')  # Convert to datetime
all_dates = all_dates.dropna()  # Drop invalid dates
days_span = (all_dates.max() - all_dates.min()).days  # Calculate days difference

# 3. Number of layers in the graph
num_layers = len(new_df['layer'].unique())

# 4. Count of unique transactions based on 'action_taken_by_bank_wallet_pgpa_merchant_insurance'
transaction_modes = new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'].value_counts()

# --- Show Unique Account Numbers and IFSC Codes ---
unique_account_ifsc = new_df[['from_account_no', 'ifsc_code']].drop_duplicates()

# --- Create a plaintext node to display the summary in a box in the bottom left corner ---
summary_text = f"""<<TABLE BORDER="1" CELLBORDER="0" CELLSPACING="2">
<TR><TD ALIGN="LEFT"><FONT POINT-SIZE="14"><B>Summary of Graph</B></FONT></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Accounts (excluding Layer 1's 'from_account_no'): <B>{len(total_accounts_excluding_layer_1)}</B></TD></TR>
<TR><TD ALIGN="LEFT">Time Span of Transactions: <B>{days_span} days</B></TD></TR>
<TR><TD ALIGN="LEFT">Total No. of Layers: <B>{num_layers}</B></TD></TR>
<TR><TD ALIGN="LEFT"><B>Unique Mode of Transactions</B></TD></TR>"""

# Adding each mode and its count to the summary text
for mode, count in transaction_modes.items():
    summary_text += f'<TR><TD ALIGN="LEFT">{mode}: <B>{count}</B></TD></TR>'

# Adding unique accounts and IFSC codes to the summary
summary_text += '<TR><TD ALIGN="LEFT"><B>Unique Accounts and IFSC Codes</B></TD></TR>'
for idx, row in unique_account_ifsc.iterrows():
    summary_text += f'<TR><TD ALIGN="LEFT">Account: {row["from_account_no"]}, IFSC: {row["ifsc_code"]}</TD></TR>'

summary_text += "</TABLE>>"

# --- Add this summary text node at the bottom left corner (rank=sink) ---
dot.node('summary', label=summary_text, shape='plaintext')

# Render the combined graph after processing all layers
filename = 'Final_Graph_with_Summary'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
transaction_modes

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate amounts for each from-to account pair
edges_dict = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{account_info['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{account_info['amount']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""
                dot.node(f'account_{account}', 
                         label=label,
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and sum amounts if necessary
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            # Create a key to represent the from-to account pair
            edge_key = (from_node, to_node)

            # If this edge already exists, sum the amounts
            if edge_key in edges_dict:
                edges_dict[edge_key]['amount'] += row['amount']  # Sum the amounts
                edges_dict[edge_key]['transaction_ids'].append(row['transaction_id'])  # Append the transaction ID
            else:
                edges_dict[edge_key] = {
                    'amount': row['amount'],
                    'transaction_ids': [row['transaction_id']],
                    'dates': [row['transaction_date']]
                }

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Now create the edges with summed amounts shown as a separate node
for (from_node, to_node), edge_data in edges_dict.items():
    # Create a new node that will show the summed amount
    sum_label = f"""<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">
    <TR><TD><FONT COLOR="red" POINT-SIZE="12">Total Amount: {format_amount_indian(edge_data['amount'])}</FONT></TD></TR>
    </TABLE>>"""
    sum_node = f'sum_{from_node}_{to_node}'
    dot.node(sum_node, label=sum_label, shape='ellipse', style='dashed')

    # Create edges from the from_node to the sum_node and from the sum_node to the to_node
    dot.edge(from_node, sum_node, minlen='2')
    dot.edge(sum_node, to_node, minlen='2')

# Render the combined graph after processing all layers
filename = 'GraphWithSummedAmountNewNodes'
dot.render(filename, format='pdf', cleanup=False)
dot.view()


In [None]:
#Graph with layers column wise working fine
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Left to right, with more spacing
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')
# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track the total amount of money flowing into each 'to_account_no'
target_amounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer',
                                 'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Create a subgraph (cluster) for the current layer (i.e., column)
    with dot.subgraph() as s:
        s.attr(rank='same')  # All nodes in the same layer should have the same rank (align horizontally)
        s.attr(label=f'Layer {i}', style='dotted')  # Add a dotted line around the cluster for clarity

        # Get distinct from_account_no and their associated to_account_no
        unique_from = layer_nodes['from_account_no'].unique()
        unique_to = layer_nodes['to_account_no'].unique()

        # Create unified nodes for both 'from' and 'to' accounts in this layer
        for account in set(unique_from).union(set(unique_to)):
            if account not in unique_nodes:  # Check if node already exists
                # Collecting additional info for the node label
                account_info = layer_nodes[layer_nodes['from_account_no'] == account]
                if account_info.empty:
                    account_info = layer_nodes[layer_nodes['to_account_no'] == account]

                # Ensure there is info to show
                if not account_info.empty:
                    account_info = account_info.iloc[0]  # Access the first row
                    
                    # Get the summed amount for the node
                    summed_amount = target_amounts.get(account, account_info['amount'])

                    # Create the node label with relevant information
                    label = f"""<<TABLE BORDER="0" CELLBORDER="0">
                <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="#036100"><B>{account_info['transaction_id']}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{summed_amount}</B></FONT></TD><TD><FONT COLOR="red" POINT-SIZE="15"><B>Layer: {account_info['layer']}</B></FONT></TD></TR>
                <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
                <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
            </TABLE>>"""
                    
                    # Add the node to the current layer subgraph
                    s.node(f'account_{account}', 
                           label=label,
                           shape='box')
                    unique_nodes.add(account)  # Mark the node as created

    # Create edges based on transactions and sum amounts for each target account
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            # Sum the amounts for each target node (to_account)
            if row['to_account_no'] in target_amounts:
                target_amounts[row['to_account_no']] += row['amount']  # Add to existing amount
            else:
                target_amounts[row['to_account_no']] = row['amount']  # Initialize amount

            # Create the edge between nodes (without a label)
            dot.edge(from_node, to_node, minlen='2')

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Render the combined graph after processing all layers
filename = 'StructuredGraphWithLayers'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
# Code from GPT with colors added and working fine (issue is at multiple edges for a node amount summation)

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate transaction details for each to-account
target_transactions = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer', 'remarks',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                
                # Get concatenated transaction IDs and amounts for the node
                transactions = target_transactions.get(account, f"{account_info['transaction_id']}@{format_amount_indian(account_info['amount'])}")
                print("-----",transactions)
                label = f"""<<TABLE BORDER="0" CELLBORDER="0">
                        <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
                        <TR><TD><FONT COLOR="#036100"><B>{transactions}</B></FONT></TD></TR>
                        <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
                        <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
                        <TR><TD><FONT COLOR="RED">{split_text(account_info['remarks'])}</FONT></TD></TR>
                        <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD><TD><FONT COLOR="#6b3700">Layer: {account_info['layer']}</FONT></TD></TR>
                    </TABLE>>"""
                dot.node(f'account_{account}', 
                         label=label,
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and concatenate details for each target account
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            # Concatenate transaction ID and amount for each target node (to_account)
            if row['to_account_no'] in target_transactions:
                target_transactions[row['to_account_no']] += f"@{row['transaction_id']}@{format_amount_indian(row['amount'])}"
            else:
                target_transactions[row['to_account_no']] = f"{row['transaction_id']}@{format_amount_indian(row['amount'])}"

            print(target_transactions[row['to_account_no']])
            # Create an edge without labels
            dot.edge(from_node, to_node, minlen='2')
    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Render the combined graph after processing all layers
filename = 'GraphWithTransactionIDsAndAmountsInNodes'
dot.render(filename, format='pdf', cleanup=False) 
dot.view()

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track all transactions (ID and amount) for each to-account
target_transactions = {}

# Step 1: Populate the target_transactions dictionary first
for i in new_df['layer'].unique():
    dt = new_df[(new_df['layer'] == i)]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer', 'remarks',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Process each row to populate target_transactions for edges
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        # Only process if from_node and to_node are different (no self-loops)
        if from_node != to_node:
            # Concatenate transaction ID and amount for each target node (to_account)
            if row['to_account_no'] in target_transactions:
                target_transactions[row['to_account_no']] += f"<BR/>{row['transaction_id']}@{format_amount_indian(row['amount'])}"
            else:
                target_transactions[row['to_account_no']] = f"{row['transaction_id']}@{format_amount_indian(row['amount'])}"

            # Create an edge without labels
            dot.edge(from_node, to_node, minlen='2')

    # Store accounts for the current layer (for possible connections between layers)
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # Connect accounts between layers (if there are previous layers)
    if i > 1:
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    if f'account_{from_account}' != f'account_{from_account}':  # Avoid self-loop
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Step 2: After edges are processed, create/update the nodes with concatenated transactions
for account in target_transactions.keys():
    # Extract the first transaction info for the node
    account_info = new_df[(new_df['from_account_no'] == account) | (new_df['to_account_no'] == account)].iloc[0]

    # Generate the label with concatenated transactions
    transactions = target_transactions[account]
    label = f"""<<TABLE BORDER="0" CELLBORDER="0">
        <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="#036100"><B>{transactions}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
        <TR><TD><FONT COLOR="red">{split_text(account_info['remarks'])}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD><TD><FONT COLOR="#6b3700">Layer: {account_info['layer']}</FONT></TD></TR>
    </TABLE>>"""

    # Determine the node shape and color based on the transaction type
    action = account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']
    if 'WITHDRAWAL' in action.upper():
            node_shape = 'circle'
            node_color = 'blue'
    elif 'ON HOLD' in action.upper():
        node_shape = 'circle'
        node_color = 'red'
    elif action.upper() == 'OTHER':
        node_shape = 'circle'
        node_color = 'violet'
    else:
        node_shape = 'box'
        node_color = 'black'

    # Create or update the node with the full label and conditionally set the shape and color
    dot.node(f'account_{account}', 
             label=label,
             shape=node_shape,
             color=node_color)

# Step 3: Handle cases where there is no to_account_no (NaN cases)
for _, row in new_df.iterrows():
    if pd.isna(row['to_account_no']):
        from_node = f'account_{row["from_account_no"]}'
        nan_node = f'NAN_{row["transaction_id"]}'  # Unique node name for NaN case

        # Generate the label for the NaN node
        label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>NaN</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{row['transaction_id']}@{format_amount_indian(row['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{row['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{row['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="red">{split_text(row['remarks'])}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{row['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD><TD><FONT COLOR="#6b3700">Layer: {row['layer']}</FONT></TD></TR>
        </TABLE>>"""

        # Set the node shape and color based on action
        action = row['action_taken_by_bank_wallet_pgpa_merchant_insurance']
        if 'WITHDRAWAL' in action.upper():
            node_shape = 'circle'
            node_color = 'blue'
        elif 'ON HOLD' in action.upper():
            node_shape = 'circle'
            node_color = 'red'
        elif action.upper() == 'OTHER':
            node_shape = 'circle'
            node_color = 'violet'
        else:
            node_shape = 'box'
            node_color = 'black'

        # Create the NaN node
        dot.node(nan_node, label=label, shape=node_shape, color=node_color)

        # Create the edge from the from_account_no to the NaN node
        dot.edge(from_node, nan_node, minlen='1', constraint='true')

# Render the combined graph after processing all layers
filename = 'GraphWithNaNNodes'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
action

In [None]:
#tested ok (With summed amounts, remarks, transaction id and amounts)
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track all transactions (ID and amount) for each to-account
target_transactions = {}

# Step 1: Populate the target_transactions dictionary first
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer', 'remarks','reported_info',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Process each row to populate target_transactions for edges
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only process if from_node and to_node are different (no self-loops)
        if from_node != to_node:
            # Concatenate transaction ID and amount for each target node (to_account)
            if row['to_account_no'] in target_transactions:
                target_transactions[row['to_account_no']] += f"<BR/>{row['transaction_id']}@{format_amount_indian(row['amount'])}"
            else:
                target_transactions[row['to_account_no']] = f"{row['transaction_id']}@{format_amount_indian(row['amount'])}"

            # Create an edge without labels
            dot.edge(from_node, to_node, minlen='2')

    # Store accounts for the current layer (for possible connections between layers)
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # Connect accounts between layers (if there are previous layers)
    if i > 1:
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    if f'account_{from_account}' != f'account_{from_account}':  # Avoid self-loop
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Step 2: After edges are processed, create/update the nodes with concatenated transactions
for account in target_transactions.keys():
    # Extract the first transaction info for the node
    account_info = new_df[(new_df['from_account_no'] == account) | (new_df['to_account_no'] == account)].iloc[0]

    # Generate the label with concatenated transactions
    transactions = target_transactions[account]
    label = f"""<<TABLE BORDER="0" CELLBORDER="0">
        <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="#036100"><B>{transactions}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
        <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
        <TR><TD><FONT COLOR="red">{split_text(account_info['remarks'])}</FONT></TD><TD><FONT COLOR="red">{account_info['reported_info']}</FONT></TD></TR>
        <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD><TD><FONT COLOR="#6b3700">Layer: {account_info['layer']}</FONT></TD></TR>
    </TABLE>>"""
    
    # Create or update the node with the full label
    dot.node(f'account_{account}', 
             label=label,
             shape='box')

# Render the combined graph after processing all layers
filename = 'GraphWithTransactionIDsAndAmountsInNodes'
dot.render(filename, format='pdf', cleanup=False) 
dot.view()


In [None]:
#code by pavan putra not changed
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')
# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 'layer',
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code','transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]
            
            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                dot.node(f'account_{account}', 
                         f'Account: {account}\nAmount: {account_info["amount"]}\nDate: {account_info["transaction_date"]}\nIFSC: {account_info["ifsc_code"]}\nTransaction_id: {account_info["transaction_id"]}\nLayer: {account_info["layer"]}', 
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            dot.edge(from_node, to_node,  minlen='2')  # Increase edge length

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Render the combined graph after processing all layers
filename = 'Final_Graph'
dot.render(filename, format='pdf', cleanup=False)
dot.view()


In [None]:
#Graph with edges Edge having transactionid and summed amount
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph
dot = Digraph(format='pdf')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers
dot.node('title', ackname, shape='plaintext', fontsize='20', fontcolor='blue')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Dictionary to track edges and aggregate amounts for each from-to account pair
edges_dict = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount','layer' ,
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code', 'transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]

            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{account}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{account_info['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{account_info['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(account_info['amount'])}</B></FONT></TD><TD><FONT COLOR="black" POINT-SIZE="15"><B>{(account_info['layer'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{account_info['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{account_info['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""
                dot.node(f'account_{account}', 
                         label=label,
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions and sum amounts if necessary
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'

        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            # Create a key to represent the from-to account pair
            edge_key = (from_node, to_node)

            # If this edge already exists, sum the amounts
            if edge_key in edges_dict:
                edges_dict[edge_key]['amount'] += row['amount']  # Sum the amounts
                edges_dict[edge_key]['transaction_ids'].append(row['transaction_id'])  # Append the transaction ID
            else:
                edges_dict[edge_key] = {
                    'amount': row['amount'],
                    'transaction_ids': [row['transaction_id']],
                    'dates': [row['transaction_date']]
                }

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Now create the edges with the aggregated amounts
for (from_node, to_node), edge_data in edges_dict.items():
    label = f"Total Amount: {format_amount_indian(edge_data['amount'])}\nTransactions: {'@ '.join(edge_data['transaction_ids'])}"
    dot.edge(from_node, to_node, label=label, minlen='2')  # Include the label with sum of amounts

# Render the combined graph after processing all layers
filename = 'Final_Graph'
dot.render(filename, format='pdf', cleanup=False)
dot.view()

In [None]:
# Initialize the previous layer nodes (starting with an empty set)
previous_layer_nodes = None

# Set to track already added edges to avoid duplicates
added_edges = set()

# Get all distinct layers from the dataset
layers = df['layer'].unique()

# Initialize variables for storing previous layer nodes and tracking edges
added_edges = set()
previous_layer_nodes = None

# Iterate over the layers
for idx, layer in enumerate(layers):
    
    # Filter the records for the current layer
    layer_records = df[df['layer'] == layer]
    
    # Get distinct nodes for the current layer
    to_account_nodes = layer_records['to_account_no'].drop_duplicates()  # Distinct to_account_no for the current layer
    from_account_nodes = layer_records['from_account_no'].drop_duplicates()  # Distinct from_account_no for the current layer
    
    print(f"Layer {layer}: TO_Accounts: {to_account_nodes.values}, FROM_Accounts: {from_account_nodes.values}")
    
    # Add `to_account_no` as nodes in the current column (first for first layer)
    for to_account in to_account_nodes:
        # Define node label (could be extended as needed)
        label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{to_account}</B></FONT></TD></TR>
        </TABLE>>"""
        
        # Add the node for the 'to_account_no' to the graph
        dot.node(str(to_account), label=label, shape='box', color='black')  # Default to black for simplicity
        
    # Add `from_account_no` as nodes in the next column (right)
    for from_account in from_account_nodes:
        label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{from_account}</B></FONT></TD></TR>
        </TABLE>>"""
        
        # Add the node for the 'from_account_no' to the graph
        dot.node(str(from_account), label=label, shape='box', color='black')  # Default to black for simplicity
    
    # Add edges between nodes within the same layer
    # Link from 'to_account_no' to 'from_account_no' for the same transactions in this layer
    for _, row in layer_records.iterrows():
        from_node = str(row['from_account_no'])
        to_node = str(row['to_account_no'])
        edge = (to_node, from_node)  # This is the direction from to_account to from_account
        
        # Add edge if it hasn't been added already
        if edge not in added_edges:
            dot.edge(to_node, from_node)  # Add the edge to the graph
            added_edges.add(edge)  # Track added edges to prevent duplicates
    
    # If this is not the first layer, create inter-layer edges based on the condition
    if previous_layer_nodes is not None:
        # Match `to_account_no` in the previous layer with `from_account_no` in the current layer
        for _, current_row in layer_records.iterrows():
            for _, prev_row in previous_layer_nodes.iterrows():
                # Check the condition: previous layer's `to_account_no` == current layer's `from_account_no`
                if str(prev_row['to_account_no']) == str(current_row['from_account_no']):
                    # Edge from previous layer's `to_account_no` to current layer's `from_account_no`
                    edge = (str(prev_row['to_account_no']), str(current_row['from_account_no']))
                    
                    if edge not in added_edges:
                        dot.edge(str(prev_row['to_account_no']), str(current_row['from_account_no']))
                        added_edges.add(edge)  # Track added edge
    
    # Update previous_layer_nodes to the current layer's `to_account_no` and `from_account_no` for the next iteration
    previous_layer_nodes = layer_records[['to_account_no', 'from_account_no']].drop_duplicates()

# Save and view the graph (optional)
dot.render('dfs_transaction_graph_newapproach', format='png', cleanup=True)
dot.view()

In [None]:
unique_layers = df['layer'].unique()

# Create a directed graph
dot = Digraph(format='png', graph_attr={'rankdir': 'LR', 'splines': 'ortho'})

for layer in unique_layers:
    # Filter dataset for the current layer
    layer_data = df[df['layer'] == layer]

    # Get distinct from and to account numbers
    distinct_from_accounts = layer_data['from_account_no'].unique()
    distinct_to_accounts = layer_data['to_account_no'].unique()

    # Create a subgraph for the current layer
    with dot.subgraph(name=f'cluster_{layer}') as sub:
        sub.attr(label=f'Layer {layer}', color='lightgrey')

        # Add nodes for distinct from accounts
        for from_account in distinct_from_accounts:
            sub.node(str(from_account), str(from_account))

        # Add nodes for distinct to accounts
        for to_account in distinct_to_accounts:
            sub.node(str(to_account), str(to_account))

        # Add edges between from and to accounts
        for _, row in layer_data.iterrows():
            sub.edge(str(row['from_account_no']), str(row['to_account_no']))

# Save and render the graph
dot.render('layered_graph')

In [None]:
df.head()
new_df=df[['from_account_no','layer', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]
new_df['amount']=(new_df['amount'].astype(int))/10
    

In [None]:
df.head()

In [None]:
new_df.info()

In [None]:
new_df.head()

In [None]:
myDict = {}
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to'].layer.unique():
    print(i)
    if myDict:
        dump=list(myDict[i])
        dump=dump+list(new_df[(new_df['layer']==i)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]['from_account_no'])
        myDict[i] = dump
        myDict[i+1] = new_df[(new_df['layer']==i)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]['to_account_no']
    else:
        myDict[i] = new_df[(new_df['layer']==i)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]['from_account_no']
        myDict[i+1] = new_df[(new_df['layer']==i)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]['to_account_no']

In [None]:
new_df[new_df['layer']==1]['from_account_no'].unique()

In [None]:
myDict

In [None]:
new_df.head()

In [None]:
import pandas as pd
from graphviz import Digraph

# Sample DataFrame setup (replace this with your actual DataFrame)
# df = pd.read_csv('your_data.csv')  # Load your DataFrame
unique_from1=list()
from_account1=list()
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 
                                  'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]
    layer_nodes
    # Initialize the Graphviz directed graph
    dot = Digraph(format='png')

    # Set graph attributes
    dot.attr(rankdir='LR', splines='ortho')

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create nodes for each unique from_account_no

    for from_account in unique_from:
        dot.node(f'from_{from_account}', f'From: {from_account}', shape='box')

    # Create nodes for each unique to_account_no
    for to_account in unique_to:
        dot.node(f'to_{to_account}', f'To: {to_account}', shape='box')
    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        dot.edge(f'from_{row["from_account_no"]}', f'to_{row["to_account_no"]}', 
                 label=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

    filename = f'jyostna1_{i}'
    dot.render(filename, format='png', cleanup=False)

In [None]:
import pandas as pd
from graphviz import Digraph

# Sample DataFrame setup (replace this with your actual DataFrame)
# df = pd.read_csv('your_data.csv')  # Load your DataFrame

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create nodes for each unique from_account_no
    for from_account in unique_from:
        if f'from_{from_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'from_{from_account}', f'From: {from_account}', shape='box')
            unique_nodes.add(f'from_{from_account}')  # Add to unique nodes set

    # Create nodes for each unique to_account_no
    for to_account in unique_to:
        if f'to_{to_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'to_{to_account}', f'To: {to_account}', shape='box')
            unique_nodes.add(f'to_{to_account}')  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        dot.edge(f'from_{row["from_account_no"]}', f'to_{row["to_account_no"]}', 
                 label=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)


In [None]:
# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create nodes for each unique from_account_no
    for from_account in unique_from:
        if f'from_{from_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'from_{from_account}', f'From: {from_account}', shape='box')
            unique_nodes.add(f'from_{from_account}')  # Add to unique nodes set

    # Create nodes for each unique to_account_no
    for to_account in unique_to:
        if f'to_{to_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'to_{to_account}', f'To: {to_account}', shape='box')
            unique_nodes.add(f'to_{to_account}')  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'from_{row["from_account_no"]}'
        to_node = f'to_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different
        if from_node != to_node:
            dot.edge(from_node, to_node, xlabel=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

    # Store accounts for current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    dot.edge(f'to_{from_account}', f'from_{from_account}', 
                             xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}')

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)

In [None]:
#new Trail

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            dot.node(f'account_{account}', f'Account: {account}', shape='box')
            unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different
        if from_node != to_node:
            dot.edge(from_node, to_node, xlabel=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

    # Store accounts for current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    dot.edge(f'account_{from_account}', f'account_{from_account}', 
                             xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}')

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)


In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Increased spacing

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            dot.node(f'account_{account}', f'Account: {account}', shape='box')
            unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different
        if from_node != to_node:
            dot.edge(from_node, to_node, xlabel=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}', minlen='2')  # Increase edge length

    # Store accounts for current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    dot.edge(f'account_{from_account}', f'account_{from_account}', 
                             xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')  # Increase edge length

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)


In [None]:
layer_accounts

In [None]:
new_df.columns

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            dot.node(f'account_{account}', f'Account: {account}\n{new_df[new_df["from_account_no"]==account].amount}', shape='box')
            unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            dot.edge(from_node, to_node,  minlen='2')  # Increase edge length

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)


In [None]:
df

In [None]:
new_df

In [None]:
new_df['amount']

In [None]:
new_df

In [None]:
import pandas as pd
from graphviz import Digraph

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho', nodesep='1.0', ranksep='1.5')  # Adjusted spacing between nodes and layers

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'amount', 
                                  'action_taken_by_bank_wallet_pgpa_merchant_insurance', 'transaction_date', 'ifsc_code','transaction_id']]

    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()

    # Create unified nodes for both 'from' and 'to' accounts in this layer
    for account in set(unique_from).union(set(unique_to)):
        if account not in unique_nodes:  # Check if node already exists
            
            # Collecting additional info for the node label
            account_info = layer_nodes[layer_nodes['from_account_no'] == account]
            if account_info.empty:
                account_info = layer_nodes[layer_nodes['to_account_no'] == account]
            
            # Ensure there is info to show
            if not account_info.empty:
                account_info = account_info.iloc[0]  # Access the first row
                dot.node(f'account_{account}', 
                         f'Account: {account}\nAmount: {account_info["amount"]}\nDate: {account_info["transaction_date"]}\nIFSC: {account_info["ifsc_code"]}\nTransaction_id: {account_info["transaction_id"]}', 
                         shape='box')
                unique_nodes.add(account)  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        from_node = f'account_{row["from_account_no"]}'
        to_node = f'account_{row["to_account_no"]}'
        
        # Only create an edge if the from and to accounts are different (avoid self-loop)
        if from_node != to_node:
            dot.edge(from_node, to_node,  minlen='2')  # Increase edge length

    # Store accounts for the current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number, avoiding self-loops
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    # Avoid self-loop by making sure the node isn't connected to itself
                    if f'account_{from_account}' != f'account_{from_account}':
                        dot.edge(f'account_{from_account}', f'account_{from_account}', 
                                 xlabel=f'Layer connection from Layer {previous_layer} to Layer {i}', minlen='2')

# Render the combined graph after processing all layers
filename = 'combined_graph'
dot.render(filename, format='png', cleanup=False)


In [None]:
unique_nodes

In [None]:
#old

In [None]:
import pandas as pd
from graphviz import Digraph

# Sample DataFrame setup (replace this with your actual DataFrame)
# df = pd.read_csv('your_data.csv')  # Load your DataFrame

# Initialize the Graphviz directed graph outside the loop
dot = Digraph(format='png')
dot.attr(rankdir='LR', splines='ortho')

# Set to keep track of unique nodes to avoid duplicates
unique_nodes = set()

# Create a dictionary to store accounts associated with each layer
layer_accounts = {}

# Loop through each unique layer with the specified action
for i in new_df[new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to']['layer'].unique():
    dt = new_df[(new_df['layer'] == i) & (new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance'] == 'Money Transfer to')]
    layer_records = pd.DataFrame(dt)

    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]
    
    # Get distinct from_account_no and their associated to_account_no
    unique_from = layer_nodes['from_account_no'].unique()
    unique_to = layer_nodes['to_account_no'].unique()
    label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{layer_nodes['from_account_no']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{layer_nodes['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{layer_nodes['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(layer_nodes['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{layer_nodes['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{layer_nodes['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""
    # Create nodes for each unique from_account_no
    for from_account in unique_from:
        if f'from_{from_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'from_{from_account}', f'From: {from_account}', shape='box',label = label)
            unique_nodes.add(f'from_{from_account}')  # Add to unique nodes set

    # Create nodes for each unique to_account_no
    for to_account in unique_to:
        if f'to_{to_account}' not in unique_nodes:  # Check if node already exists
            dot.node(f'to_{to_account}', f'To: {to_account}', shape='box', label = label)
            unique_nodes.add(f'to_{to_account}')  # Add to unique nodes set

    # Create edges based on transactions
    for _, row in layer_nodes.iterrows():
        dot.edge(f'from_{row["from_account_no"]}', f'to_{row["to_account_no"]}', 
                 label=f'Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

    # Store accounts for current layer
    layer_accounts[i] = {
        'from': unique_from,
        'to': unique_to
    }

    # If there's a previous layer, connect accounts with the same number
    if i > 1:  # Assuming layers are numbered sequentially starting from 1
        previous_layer = i - 1
        if previous_layer in layer_accounts:
            # Connect from previous layer's "to" accounts to current layer's "from" accounts
            for from_account in layer_accounts[i]['from']:
                if from_account in layer_accounts[previous_layer]['to']:
                    dot.edge(f'to_{from_account}', f'from_{from_account}', 
                             label=f'Layer connection from Layer {previous_layer} to Layer {i}')

# Render the combined graph after processing all layers
filename = 'combined_graph123'
dot.render(filename, format='png', cleanup=False)


In [None]:
new_df[(new_df['layer']==1)&(new_df['action_taken_by_bank_wallet_pgpa_merchant_insurance']=='Money Transfer to')]

In [None]:
myDict

In [None]:
dump=list()
dump1=()
for i in new_df[new_df.action_taken_by_bank_wallet_pgpa_merchant_insurance=='Money Transfer to'].action_taken_by_bank_wallet_pgpa_merchant_insurance:
    for i in new_df[]

In [None]:
# Create a directed graph using Graphviz with hierarchical layout
dot = Digraph()

# Set the graph layout to be hierarchical (left-right) and use L-shaped edges
dot.attr(rankdir='LR', splines='ortho')

# Get the distinct unique layers from the dataset
layers = sorted(df['layer'].unique())

# Add a title at the top
dot.attr(label=ackname, fontsize='20', labelloc='t', fontcolor='black')

with dot.subgraph(name='cluster_legend') as legend:
    legend.attr(label="Legend", fontsize='14', style='dashed', rank='source')  # Use rank='source' to push it to the top
    legend.node('withdrawal', label="Withdrawal (Blue)", shape='box', color='#0000ff')
    legend.node('on_hold', label="On Hold (Red)", shape='box', color='#ff0000')
    legend.node('normal', label="Normal Transaction (Black)", shape='box', color='black')
    legend.node('legend', label= f"""<<TABLE BORDER="0" CELLBORDER="0">
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="orange" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Account No. (Orange)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="#036100" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Transaction ID (Green)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="blue" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>IFSC_code (Black)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="red" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Amount (Red)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="#6b3700" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Transaction Date (Brown)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="blue" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Action Taken (Blue)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="black" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Remarks (Black)</TD>
        </TR>
    </TABLE>>""", shape='plaintext')  # Ensure this is in the correct format

# Initialize the previous layer nodes (starting with an empty set)
previous_layer_nodes = None

# Set to track already added edges to avoid duplicates
added_edges = set()

# Loop through each distinct layer in the dataset
for idx, layer in enumerate(layers):
    # Filter the records for the current layer
    layer_records = df[df['layer'] == layer]
    print(layer,':', layer_records['to_account_no'])
    # Select relevant columns from the filtered dataset
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]
    
    # Add nodes for the current layer (from_account_no as nodes, with rectangular shape)
    for _, row in layer_nodes.iterrows():
        # Default color is black
        node_color = 'black'
        
        # Change node color if 'WITHDRAWAL' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
        if 'WITHDRAWAL' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
            node_color = '#0000ff'  # Blue color for withdrawal
            
        # Change node color if 'ON HOLD' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
        elif 'ON HOLD' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
            node_color = '#ff0000'  # Red color for on hold
        # Create the HTML-like label for each node with colored text
        # Create the HTML-like label for each node with colored text
        label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{row['from_account_no']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{row['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{row['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(row['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{row['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{row['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""

        # Add node with the specific color and rectangular shape
        dot.node(
            str(row['from_account_no']), 
            label=label,
            shape='box',
            color=node_color
        )

        # Check for NaN in from_account_no
        if str(row['to_account_no']).upper() == "NAAN":
            # Create a unique identifier for each NaN node based on its transaction details
            nan_node_id = f"NaaN_{row['transaction_id']}"  # Unique ID for this specific NaaN transaction
            node_color = '#cccccc'
            if 'WITHDRAWAL' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
                node_color = '#0000ff'  # Blue color for withdrawal
            
            # Change node color if 'ON HOLD' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
            elif 'ON HOLD' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
                node_color = '#ff0000'  # Red color for on hold
            remarks = split_text(row['remarks'], max_width=30) 
            label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{row['from_account_no']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{row['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{row['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(row['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="black">{row['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{row['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="black">{remarks}</FONT></TD></TR>
        </TABLE>>"""
            dot.node(nan_node_id, label=label, shape='ellipse', color=node_color)

            # Add an edge from the current node to the unique NaaN node
            edge = (str(row['to_account_no']), nan_node_id)
            if edge not in added_edges:
                dot.edge(str(row['from_account_no']), nan_node_id)
                added_edges.add(edge)

    # If this is not the first layer, add edges between the previous layer and the current one
    matching_previous_layer_nodes=0
    if previous_layer_nodes is not None:
        # Add edges based on "to_account_no" in the current layer matching "from_account_no" in the previous layer
        for _, row in layer_nodes.iterrows():
            matching_previous_layer_nodes = previous_layer_nodes[previous_layer_nodes['from_account_no'] == row['to_account_no']]
            
            for _, prev_row in matching_previous_layer_nodes.iterrows():
                # Create an identifier for the edge to check for duplicates
                edge = (str(prev_row['to_account_no']), str(row['from_account_no']))
                
                # Add edge if it hasn't been added yet
                if edge not in added_edges:
                    dot.edge(str(prev_row['to_account_no']), str(row['from_account_no']))
                    added_edges.add(edge)  # Track the added edge

    # Set the current layer as the previous layer for the next iteration
    previous_layer_nodes = layer_nodes

# Save and view the graph (optional)
dot.render('dfs_transaction_graph_with_individual_nan_colors', format='png', cleanup=True)
dot.view()

In [None]:
#originall

In [None]:
# Create a directed graph using Graphviz with hierarchical layout
dot = Digraph()

# Set the graph layout to be hierarchical (left-right) and use L-shaped edges
dot.attr(rankdir='LR', splines='ortho')

# Get the distinct unique layers from the dataset
layers = sorted(df['layer'].unique())

# Add a title at the top
dot.attr(label=ackname, fontsize='20', labelloc='t', fontcolor='black')

with dot.subgraph(name='cluster_legend') as legend:
    legend.attr(label="Legend", fontsize='14', style='dashed', rank='source')  # Use rank='source' to push it to the top
    legend.node('withdrawal', label="Withdrawal (Blue)", shape='box', color='#0000ff')
    legend.node('on_hold', label="On Hold (Red)", shape='box', color='#ff0000')
    legend.node('normal', label="Normal Transaction (Black)", shape='box', color='black')
    legend.node('legend', label= f"""<<TABLE BORDER="0" CELLBORDER="0">
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="orange" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Account No. (Orange)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="#036100" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Transaction ID (Green)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="blue" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>IFSC_code (Black)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="red" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Amount (Red)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="#6b3700" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Transaction Date (Brown)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="blue" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Action Taken (Blue)</TD>
        </TR>
        <TR>
            <TD><TABLE BORDER="0" CELLBORDER="0"><TR><TD BGCOLOR="black" WIDTH="20" HEIGHT="20"></TD></TR></TABLE></TD>
            <TD>Remarks (Black)</TD>
        </TR>
    </TABLE>>""", shape='plaintext')  # Ensure this is in the correct format

# Initialize the previous layer nodes (starting with an empty set)
previous_layer_nodes = None

# Set to track already added edges to avoid duplicates
added_edges = set()

# Loop through each distinct layer in the dataset
for idx, layer in enumerate(layers):
    # Filter the records for the current layer
    layer_records = df[df['layer'] == layer]
    print(layer,':', layer_records['to_account_no'])
    # Select relevant columns from the filtered dataset
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]
    
    # Add nodes for the current layer (from_account_no as nodes, with rectangular shape)
    for _, row in layer_nodes.iterrows():
        # Default color is black
        node_color = 'black'
        
        # Change node color if 'WITHDRAWAL' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
        if 'WITHDRAWAL' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
            node_color = '#0000ff'  # Blue color for withdrawal
            
        # Change node color if 'ON HOLD' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
        elif 'ON HOLD' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
            node_color = '#ff0000'  # Red color for on hold
        # Create the HTML-like label for each node with colored text
        # Create the HTML-like label for each node with colored text
        label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{row['from_account_no']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{row['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{row['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(row['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{row['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="blue">{row['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
        </TABLE>>"""

        # Add node with the specific color and rectangular shape
        dot.node(
            str(row['from_account_no']), 
            label=label,
            shape='box',
            color=node_color
        )

        # Check for NaN in from_account_no
        if str(row['to_account_no']).upper() == "NAAN":
            # Create a unique identifier for each NaN node based on its transaction details
            nan_node_id = f"NaaN_{row['transaction_id']}"  # Unique ID for this specific NaaN transaction
            node_color = '#cccccc'
            if 'WITHDRAWAL' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
                node_color = '#0000ff'  # Blue color for withdrawal
            
            # Change node color if 'ON HOLD' is in action_taken_by_bank_wallet_pgpa_merchant_insurance
            elif 'ON HOLD' in row['action_taken_by_bank_wallet_pgpa_merchant_insurance'].upper():
                node_color = '#ff0000'  # Red color for on hold
            remarks = split_text(row['remarks'], max_width=30) 
            label = f"""<<TABLE BORDER="0" CELLBORDER="0">
            <TR><TD><FONT COLOR="orange" POINT-SIZE="12"><B>{row['from_account_no']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="#036100"><B>{row['transaction_id']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="blue"><B>{row['ifsc_code']}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="red" POINT-SIZE="15"><B>{format_amount_indian(row['amount'])}</B></FONT></TD></TR>
            <TR><TD><FONT COLOR="black">{row['transaction_date']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="#6b3700">{row['action_taken_by_bank_wallet_pgpa_merchant_insurance']}</FONT></TD></TR>
            <TR><TD><FONT COLOR="black">{remarks}</FONT></TD></TR>
        </TABLE>>"""
            dot.node(nan_node_id, label=label, shape='ellipse', color=node_color)

            # Add an edge from the current node to the unique NaaN node
            edge = (str(row['to_account_no']), nan_node_id)
            if edge not in added_edges:
                dot.edge(str(row['from_account_no']), nan_node_id)
                added_edges.add(edge)

    # If this is not the first layer, add edges between the previous layer and the current one
    matching_previous_layer_nodes=0
    if previous_layer_nodes is not None:
        # Add edges based on "to_account_no" in the current layer matching "from_account_no" in the previous layer
        for _, row in layer_nodes.iterrows():
            matching_previous_layer_nodes = previous_layer_nodes[previous_layer_nodes['from_account_no'] == row['to_account_no']]
            
            for _, prev_row in matching_previous_layer_nodes.iterrows():
                # Create an identifier for the edge to check for duplicates
                edge = (str(prev_row['to_account_no']), str(row['from_account_no']))
                
                # Add edge if it hasn't been added yet
                if edge not in added_edges:
                    dot.edge(str(prev_row['to_account_no']), str(row['from_account_no']))
                    added_edges.add(edge)  # Track the added edge

    # Set the current layer as the previous layer for the next iteration
    previous_layer_nodes = layer_nodes

# Save and view the graph (optional)
dot.render('dfs_transaction_graph_with_individual_nan_colors', format='png', cleanup=True)
dot.view()

In [None]:
matching_previous_layer_nodes 


In [None]:
for i in df['layer'].unique():
    dt = df[df['layer'] == i]
    layer_records = pd.DataFrame(dt)
    # Filter the layer_nodes dataframe
    layer_nodes = layer_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

    # Initialize the Graphviz directed graph
    dot = Digraph(format='png')

    # Set graph attributes
    dot.attr(rankdir='LR', splines='ortho')

    # Add nodes for from_account_no (first row) and to_account_no (second row)
    for i, row in layer_nodes.iterrows():
        from_node = f'From: {row["from_account_no"]}\nTxnID: {row["transaction_id"]}'
        to_node = f'To: {row["to_account_no"]}\nIFSC: {row["ifsc_code"]}\nRemarks: {row["remarks"]}\nAmount: {row["amount"]}'
        
        # Add from_account_no and to_account_no as nodes (square shape)
        dot.node(f'from_{i}', from_node, shape='box')
        dot.node(f'to_{i}', to_node, shape='box')

        # Create an edge between from_account_no and to_account_no
        dot.edge(f'from_{i}', f'to_{i}', label=f'Bank Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')
    filename = 'test'+ str(i)
    # Render the graph
    dot.render(filename, format='png', cleanup=False)
    

In [None]:
dt1 = df[df['layer']==1]
layer_records = pd.DataFrame(dt1)
layer_records

In [None]:
# Filter the layer_nodes dataframe
layer_nodes = layer_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'ifsc_code', 'amount', 'remarks', 'action_taken_by_bank_wallet_pgpa_merchant_insurance']]

# Initialize the Graphviz directed graph
dot = Digraph(format='png')

# Set graph attributes
dot.attr(rankdir='LR', splines='ortho')

# Add nodes for from_account_no (first row) and to_account_no (second row)
for i, row in layer_nodes.iterrows():
    from_node = f'From: {row["from_account_no"]}\nTxnID: {row["transaction_id"]}'
    to_node = f'To: {row["to_account_no"]}\nIFSC: {row["ifsc_code"]}\nRemarks: {row["remarks"]}\nAmount: {row["amount"]}'
    
    # Add from_account_no and to_account_no as nodes (square shape)
    dot.node(f'from_{i}', from_node, shape='box')
    dot.node(f'to_{i}', to_node, shape='box')

    # Create an edge between from_account_no and to_account_no
    dot.edge(f'from_{i}', f'to_{i}', label=f'Bank Action: {row["action_taken_by_bank_wallet_pgpa_merchant_insurance"]}')

# Render the graph
dot.render('dfs_transaction_graph', format='png', cleanup=False)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from io import BytesIO
import graphviz


df = pd.DataFrame(df)

# Function to plot grid and overlay graph
def plot_grid_with_graph(dot_graph, max_rows):
    fig, ax = plt.subplots(figsize=(8, 8))
    # Create a grid
    ax.set_xticks(range(0, 6, 1))
    ax.set_yticks(range(0, max_rows, 1))
    ax.grid(True)

    # Render the Graphviz graph as PNG
    png_bytes = dot_graph.pipe(format='png')
    png_img = mpimg.imread(BytesIO(png_bytes))

    # Plot the graph on the grid
    ax.imshow(png_img, extent=[0, 5, 0, max_rows-1], aspect='auto')
    plt.show()

# Function to create Graphviz digraph and overlay nodes and edges based on the dataset layer
def process_layer(layer_df, graph, from_row, to_row):
    layer = layer_df['layer'].iloc[0]
    graph.attr(rankdir='LR',splines = 'ortho')
    graph.attr('node', shape='box')
    
    # Place nodes for from_account_no in the `from_row`
    from_accounts = set(layer_df['from_account_no'].unique())
    for i, acc in enumerate(from_accounts):
        graph.node(f'F_{acc}', label=acc, pos=f'{i},{from_row}!')
    
    # Place nodes for to_account_no in the `to_row`
    to_accounts = set(layer_df['to_account_no'].unique())
    for i, acc in enumerate(to_accounts):
        graph.node(f'T_{acc}', label=acc, pos=f'{i},{to_row}!')
    
    # Create edges between from_account_no and to_account_no
    for _, row in layer_df.iterrows():
        from_acc = row['from_account_no']
        to_acc = row['to_account_no']
        graph.edge(f'F_{from_acc}', f'T_{to_acc}', label=row['transaction_id'])
    
    return graph

# Main function to process each layer
def plot_layers(df):
    distinct_layers = sorted(df['layer'].unique())
    max_rows = len(distinct_layers) + 2  # Rows needed for plotting

    graph = graphviz.Digraph(format='png')
    row_mapping = {}  # To track which layer occupies which rows
    
    for i, layer in enumerate(distinct_layers):
        layer_df = df[df['layer'] == layer]
        from_row = i  # From accounts will be plotted in row `i`
        to_row = i + 1  # To accounts will be in row `i + 1`
        row_mapping[layer] = (from_row, to_row)
        graph = process_layer(layer_df, graph, from_row, to_row)

    # Plot the graph with a grid on the matplotlib
    plot_grid_with_graph(graph, max_rows)

# Call the function to plot layers
plot_layers(df)
