In [13]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import re
from jupyter_dash import JupyterDash  # Use JupyterDash instead of dash.Dash
import dash
from dash import html
import dash_cytoscape as cyto

In [14]:
# Function to clean column names
def clean_column_name(name):
    # Convert to lowercase
    name = name.lower()
    # Replace spaces with underscores
    name = name.replace(' ', '_')
    # Remove special characters (except underscores)
    name = re.sub(r'[^\w\s]', '', name)
    return name

# Apply the function to each column name
#df.columns = [clean_column_name(col) for col in df.columns]

In [15]:
# Read the CSV file
file_path = 'D:\\test\\excel2.xlsx'  # Replace with your file path
df = pd.read_excel(file_path, dtype= str)

df.columns = [clean_column_name(col) for col in df.columns]
# Create a list to hold new rows
new_rows = []
# List of columns where you want to append '@'
columns_to_append = ['acknowledgement_no','transaction_id','to_account_no', 'from_account_no','utr_number']

# Prepend '@' to every value in the specified columns
for column in columns_to_append:
    if column in df.columns:
        df[column] = df[column].apply(lambda x: '`' + str(x) if pd.notnull(x) else x)

# Function to process columns and split rows
def process_column(df, column_name):
    new_rows = []
    for index, row in df.iterrows():
         # Ensure the column values are strings
        value = str(row[column_name])
        # Split the values in the column based on spaces and strip any extra spaces
        values = [val.strip() for val in row[column_name].split() if val.strip()]
        
        # If there's more than one value, create a new row for each value
        if len(values) > 1:
            for value in values:
                new_row = row.copy()  # Copy the original row
                new_row[column_name] = value  # Update with the trimmed text
                new_rows.append(new_row)
        else:
            # Keep the original row if there's only one value (no split needed)
            new_rows.append(row)
    return pd.DataFrame(new_rows)

# List of columns to process
columns_to_process = ['transaction_id', 'from_account_no'] # add to account no if there are dual records in it

# Ensure columns are in string format
for col in columns_to_process:
    if col in df.columns:
        df[col] = df[col].astype(str)
        
# Process each column and merge results
processed_dfs = [process_column(df, col) for col in columns_to_process]
# Merge all processed DataFrames
final_df = pd.concat(processed_dfs, ignore_index=True)

# Display the updated DataFrame to verify
print(final_df.head())

# Save the modified DataFrame to a new CSV file if needed
final_df.to_csv('D:\\test\\excel2_Cleaned.csv', index=False)
df = final_df

  s_no acknowledgement_no           transaction_id layer      to_account_no  \
0    1    `30209240022092  `PUNBR52024090612314232     2  `8830210000008880   
1    2    `30209240022092  `PUNBR52024090612314232     2  `8830210000008880   
2    3    `30209240022092  `PUNBR52024090612314232     2  `8830210000008880   
3    4    `30209240022092  `PUNBR52024090612314232     2  `8830210000008880   
4    5    `30209240022092  `PUNBR52024090612314232     2  `8830210000008880   

  action_taken_by_bank_wallet_pgpa_merchant_insurance  \
0                                  Money Transfer to    
1                                  Money Transfer to    
2                                  Money Transfer to    
3                                  Money Transfer to    
4                                  Money Transfer to    

  bank_wallet_pgpa_merchant__insurance   from_account_no    ifsc_code  \
0                         Federal Bank   `24610100000643  FDRL0001010   
1                         Canara Ban

In [16]:
#df = df.drop(['unique_id'],axis=1)
df = df.drop(['s_no'],axis=1)
# Remove text within square brackets
df['to_account_no'] = df['to_account_no'].str.replace(r'\s*\[.*?\]', '', regex=True)

In [17]:
# Define the columns to clean
columns_to_clean = [
    'acknowledgement_no', 'transaction_id', 'layer',
    'from_account_no', 'utr_number', 'amount','to_account_no'
]

# Function to clean data in a column
def clean_column_data(series):
    # Remove special characters and trim spaces
    series = series.apply(lambda x: re.sub(r'[^\w\s]', '', str(x)).strip() if pd.notna(x) else x)
    return series

# Apply the cleaning function to each specified column
for col in columns_to_clean:
    if col in df.columns:
        df[col] = clean_column_data(df[col])

# Display the cleaned data to verify
print(df[columns_to_clean].head())

  acknowledgement_no          transaction_id layer  from_account_no  \
0     30209240022092  PUNBR52024090612314232     2   24610100000643   
1     30209240022092  PUNBR52024090612314232     2     110196345628   
2     30209240022092  PUNBR52024090612314232     2     110189607716   
3     30209240022092  PUNBR52024090612314232     2  179053000017033   
4     30209240022092  PUNBR52024090612314232     2   20200057515505   

               utr_number  amount     to_account_no  
0  DBSSR52024090605165954  270000  8830210000008880  
1  DBSSR52024090605165914  200000  8830210000008880  
2             CNRB0004785  399999  8830210000008880  
3  DBSSR52024090605165915  371900  8830210000008880  
4  DBSSR52024090605165951  290178  8830210000008880  


In [18]:
df['unique_id'] = pd.Series(range(1, len(df) + 1))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 312 entries, 0 to 311
Data columns (total 27 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   acknowledgement_no                                   312 non-null    object 
 1   transaction_id                                       312 non-null    object 
 2   layer                                                312 non-null    object 
 3   to_account_no                                        312 non-null    object 
 4   action_taken_by_bank_wallet_pgpa_merchant_insurance  312 non-null    object 
 5   bank_wallet_pgpa_merchant__insurance                 312 non-null    object 
 6   from_account_no                                      312 non-null    object 
 7   ifsc_code                                            195 non-null    object 
 8   cheque_no                                            312 non-null    o

In [19]:
df['layer'] = pd.to_numeric(df['layer'], errors='coerce')
print(df['layer'].unique())
df.info()

[2 3 1 4 5 6 7]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 312 entries, 0 to 311
Data columns (total 27 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   acknowledgement_no                                   312 non-null    object 
 1   transaction_id                                       312 non-null    object 
 2   layer                                                312 non-null    int64  
 3   to_account_no                                        312 non-null    object 
 4   action_taken_by_bank_wallet_pgpa_merchant_insurance  312 non-null    object 
 5   bank_wallet_pgpa_merchant__insurance                 312 non-null    object 
 6   from_account_no                                      312 non-null    object 
 7   ifsc_code                                            195 non-null    object 
 8   cheque_no                                            3

In [20]:
layers = df.groupby('layer')
layer1 = layers.get_group(1)
layer1

Unnamed: 0,acknowledgement_no,transaction_id,layer,to_account_no,action_taken_by_bank_wallet_pgpa_merchant_insurance,bank_wallet_pgpa_merchant__insurance,from_account_no,ifsc_code,cheque_no,mid,...,remarks,date_of_action,action_taken_by_bank,action_taken_name,action_taken_by_email,branch_location,branch_manager_name__contact_details,unnamed_25,unnamed_26,unique_id
21,30209240022092,SBINR52024090646591003,1,8830210000008880,Transaction put on hold,DBS Bank,1146110010053650.0,,0,,...,Transaction put on hold.,2024-09-22 17:59:28,DBS Bank (Including Lakshmi Vilas Bank),Akanksha Bhagade,akankshabhagade@dbs.com,,,,,22
34,30209240022092,PUNBR52024090612314232,1,10832030036580,Money Transfer to,DBS Bank,8830210000008880.0,DBSS0IN0830,0,,...,NRTGS/PUNBR52024090612314232/CRAVY MEALES ENTE...,2024-09-22 18:06:13,Punjab National Bank (including Oriental Bank ...,Rahul Singh,nccrpc.i4c@pnb.co.in,,,,,35
38,30209240022092,PUNBR52024090612314232,1,8830210000008880,Other,DBS Bank,,,0,,...,Duplicate transaction,2024-09-22 18:10:40,DBS Bank (Including Lakshmi Vilas Bank),Ajeet Kumar,ajeetkumar@dbs.com,,,,,39
39,30209240022092,SBINR52024090646591003,1,10047593897,Money Transfer to,DBS Bank,8830210000008880.0,DBSS0IN0830,0,,...,Transaction amount 2799952.80,2024-09-22 18:13:57,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,,40
40,30209240022092,965562,1,10047593897,Money Transfer to,State Bank of India,43017388358.0,SBIN0001464,0,,...,,2024-09-22 18:18:25,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,,41
41,30209240022092,965563,1,10047593897,Money Transfer to,State Bank of India,43280321955.0,SBIN0011519,0,,...,,2024-09-22 18:19:12,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,,42
42,30209240022092,965562,1,43017388358,Other,State Bank of India,,,0,,...,Account does not have sufficient balance. However,Hold Kept for the disputed amount Rs.1100000,2024-09-22 18:19:43,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,43
43,30209240022092,965563,1,43280321955,Other,State Bank of India,,,0,,...,Account does not have sufficient balance. However,Hold Kept for the disputed amount Rs.1200000,2024-09-22 18:19:56,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,44
44,30209240022092,10047593897,1,43017388358,Other,State Bank of India,,,0,,...,Repeated transaction. Action already initiated.,2024-09-22 18:20:28,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,,45
45,30209240022092,10047593897,1,43280321955,Other,State Bank of India,,,0,,...,Repeated transaction. Action already initiated.,2024-09-22 18:20:39,State Bank of India,S V Vinod Kumar,agmsrvl.lhoand@sbi.co.in,,,,,46


In [21]:
# Assuming df contains your data
layers = df.groupby('layer')
for layer_num, layer_data in layers:
    # Get distinct account numbers
    print(layer_data)
    distinct_accounts = layer_data['from_account_no'].unique()
    print(distinct_accounts)

    acknowledgement_no          transaction_id  layer     to_account_no  \
21      30209240022092  SBINR52024090646591003      1  8830210000008880   
34      30209240022092  PUNBR52024090612314232      1    10832030036580   
38      30209240022092  PUNBR52024090612314232      1  8830210000008880   
39      30209240022092  SBINR52024090646591003      1       10047593897   
40      30209240022092                  965562      1       10047593897   
41      30209240022092                  965563      1       10047593897   
42      30209240022092                  965562      1       43017388358   
43      30209240022092                  965563      1       43280321955   
44      30209240022092             10047593897      1       43017388358   
45      30209240022092             10047593897      1       43280321955   
52      30209240022092  SBINR52024090646591003      1  8830210000008880   
177     30209240022092  SBINR52024090646591003      1  8830210000008880   
190     30209240022092  P

In [22]:
def format_amount_indian(amount):
    # Convert the amount to a string and remove any existing commas
    amount_str = str(amount).replace(',', '')
    
    # Check if the number has more than 3 digits
    if len(amount_str) > 3:
        # Get the last 3 digits
        last_three = amount_str[-3:]
        # Get the remaining digits
        remaining = amount_str[:-3]
        # Reverse the remaining digits for easier grouping
        remaining_reversed = remaining[::-1]
        # Group digits in thousands
        grouped = [remaining_reversed[i:i+2] for i in range(0, len(remaining_reversed), 2)]
        # Reverse and join the grouped digits with commas
        formatted_remaining = ','.join(grouped[::-1])
        # Concatenate the formatted remaining part with the last three digits
        formatted_amount = f'{formatted_remaining},{last_three}'
    else:
        # For amounts less than or equal to 999, no formatting is needed
        formatted_amount = amount_str

    return formatted_amount

In [23]:
ackno = df['acknowledgement_no'].unique()
ackname = 'Transaction Graph For (Ack No): ' + str(ackno)
ackname

"Transaction Graph For (Ack No): ['30209240022092']"

In [26]:
# Step 1: Retrieve Layer 1 records where "Money Transfer" occurs
layer1_records = df[(df['layer'] == 1) & (df['action_taken_by_bank_wallet_pgpa_merchant_insurance'].str.contains('Money Transfer'))]
layer1_nodes = layer1_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'amount', 'remarks']]

# Create a directed graph
G = nx.DiGraph()

# Add Layer 1 nodes (from_account_no as nodes, with details as attributes)
for _, row in layer1_nodes.iterrows():
    G.add_node(row['from_account_no'], 
               to_account=row['to_account_no'], 
               transaction_id=row['transaction_id'], 
               transaction_date=row['transaction_date'], 
               amount=row['amount'], 
               remarks=row['remarks'])

# Step 2: Retrieve Layer 2 records where "Money Transfer" occurs
layer2_records = df[(df['layer'] == 2) & (df['action_taken_by_bank_wallet_pgpa_merchant_insurance'].str.contains('Money Transfer'))]
layer2_nodes = layer2_records[['from_account_no', 'to_account_no', 'transaction_id', 'transaction_date', 'amount', 'remarks']]

# Add Layer 2 nodes and connect edges based on "to_account_no" in Layer 2 matching "from_account_no" in Layer 1
for _, row in layer2_nodes.iterrows():
    # Find matching Layer 1 nodes where to_account_no equals from_account_no in Layer 2
    matching_layer1_nodes = layer1_nodes[layer1_nodes['from_account_no'] == row['to_account_no']]
    
    for _, layer1_row in matching_layer1_nodes.iterrows():
        # Add Layer 2 node
        G.add_node(row['from_account_no'], 
                   to_account=row['to_account_no'], 
                   transaction_id=row['transaction_id'], 
                   transaction_date=row['transaction_date'], 
                   amount=row['amount'], 
                   remarks=row['remarks'])
        
        # Add edges from Layer 1's from_account_no to Layer 2's from_account_no
        G.add_edge(layer1_row['from_account_no'], row['from_account_no'])

# Create elements for Dash Cytoscape
cyto_elements = []

# Custom positions to create an L-shape for edges
layer1_x = 100
layer2_x = 400  # Shift layer 2 nodes further right
y_increment = 100
current_y = 100

# Add nodes for Layer 1
layer1_node_positions = {}  # Keep track of node positions
for i, node in enumerate(G.nodes(data=True)):
    if node[1]['to_account'] in layer1_nodes['to_account_no'].values:  # Layer 1 nodes
        cyto_elements.append({
            'data': {'id': node[0], 'label': node[0]},  # node[0] is the node identifier (from_account_no)
            'position': {'x': layer1_x, 'y': current_y}  # Positioning for layer 1
        })
        layer1_node_positions[node[0]] = (layer1_x, current_y)  # Track Layer 1 node position for edges
        current_y += y_increment

# Reset y position for Layer 2
current_y = 100

# Add nodes for Layer 2 and edges
layer2_node_positions = {}  # Keep track of layer 2 node positions
for node in G.nodes(data=True):
    if node[1]['to_account'] not in layer1_nodes['to_account_no'].values:  # Layer 2 nodes
        cyto_elements.append({
            'data': {'id': node[0], 'label': node[0]},  # node[0] is the node identifier (from_account_no)
            'position': {'x': layer2_x, 'y': current_y}  # Positioning for layer 2
        })
        layer2_node_positions[node[0]] = (layer2_x, current_y)  # Track Layer 2 node position for edges
        current_y += y_increment

# Add edges between Layer 1 and Layer 2
for edge in G.edges():
    cyto_elements.append({
        'data': {'source': edge[0], 'target': edge[1]},
        'style': {'line-color': '#ff0000', 'width': 2, 'target-arrow-shape': 'triangle', 'arrow-color': '#0074D9'}
    })

# Initialize Dash app
app = JupyterDash(__name__)

app.layout = html.Div([
    cyto.Cytoscape(
        id='cytoscape-network',
        elements=cyto_elements,
        style={'width': '100%', 'height': '800px'},
        layout={'name': 'preset'},  # Use preset layout to keep positions defined
        stylesheet=[
            {'selector': 'node', 'style': {'content': 'data(label)', 'shape': 'rectangle', 'background-color': '#00ffD9'}},
            {'selector': 'edge', 'style': {'line-color': '#ff0000', 'curve-style': 'straight', 'target-arrow-shape': 'triangle'}}  # Straight edge styling
        ]
    )
])

if __name__ == '__main__':
    app.run_server(debug=True, port = 9099)


JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



Dash app running on http://127.0.0.1:9099/
