In [None]:
# Part 1: Installation and Setup


# Install necessary libraries
!pip install streamlit pandas numpy scikit-learn matplotlib seaborn joblib pyngrok --quiet
!pip install --upgrade scipy



[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m


In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
import warnings
import sys
# Setup ngrok
from pyngrok import ngrok
import time
import subprocess

# file upload
from google.colab import files
warnings.filterwarnings("ignore")

# Configuration
MODEL_PATH = 'financial_risk_pipeline.joblib'
METRICS_PATH = 'model_metrics_financial.joblib'
DATA_PATH = 'dashboard_data_sample_financial.csv'



# 1. Feature


# Network Traffic Data
UNSW_NB15_COLUMNS = [
    'dur', 'proto', 'service', 'state', 'spkts', 'dpkts', 'sbytes', 'dbytes',
    'rate', 'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss', 'sinpkt',
    'dinpkt', 'sjit', 'djit', 'swin', 'dwin', 'tcprtt', 'synack', 'ackdat',
    'smean', 'dmean', 'trans_depth', 'response_body_len', 'ct_srv_src',
    'ct_state_ttl', 'ct_dst_ltm', 'ct_srv_dst', 'ct_dst_sport_ltm',
    'ct_dst_src_ltm', 'is_ftp_login', 'ct_ftp_cmd', 'ct_flw_http_mthd',
    'ct_src_ltm', 'ct_slrtm', 'ct_dst_src', 'is_sm_ips_ports', 'ct_srv',
    'ct_dst', 'ct_src', 'ct_spc_dst', 'ct_bps', 'ct_pkts',
    'attack_cat', 'Label'
]

# Financial Transaction Data
FINANCIAL_COLUMNS = [
    'TransactionID', 'AccountID', 'TransactionAmount', 'TransactionDate',
    'TransactionType', 'Location', 'DeviceID', 'IPAddress', 'MerchantID',
    'Channel', 'CustomerAge', 'CustomerOccupation', 'TransactionDuration',
    'LoginAttempts', 'AccountBalance', 'PreviousTransactionDate',

]


# 2. Data Loading and Preprocessing Functions


def upload_and_process_file(file_type, required_columns):
    """Handles file upload and basic preprocessing."""
    print(f"\n--- UPLOAD DATA: {file_type.upper()} ---")

    # user upload
    if file_type == 'network':
        print("Please upload the UNSW-NB15 file")
        header_setting = None
    elif file_type == 'financial':
        print("Please upload the Financial Transaction file ")
        header_setting = 0
    else:
        print("Invalid file type specified.")
        return None, None

    try:
        uploaded = files.upload()
        if not uploaded:
            print(f"No file uploaded for {file_type}. Cannot continue.")
            return None, None

        csv_name = list(uploaded.keys())[0]

        # Load data
        df = pd.read_csv(csv_name, header=header_setting, low_memory=False)

        # Initialize target_col
        target_col = None


        if file_type == 'network':
            # Network file check
            if len(df.columns) != len(required_columns):
                raise ValueError(f"Network file expected {len(required_columns)} cols, found {len(df.columns)}.")

            df.columns = required_columns
            target_col = 'Label'
            df = df.drop(columns=['attack_cat'], errors='ignore')
            df[target_col] = pd.to_numeric(df[target_col], errors='coerce').fillna(0).astype(int)

        elif file_type == 'financial':

            # FINANCIAL DATA

            # Check for column count consistency
            expected_total_count = len(required_columns)
            expected_feature_count = expected_total_count - 1

            if len(df.columns) != expected_feature_count and len(df.columns) != expected_total_count:
                raise ValueError(f"Financial file expected **{expected_feature_count}** feature cols OR **{expected_total_count}** total cols (with 'IsFraud'), but found {len(df.columns)}.")



            # Feature Engineering  Time Transaction
            df['TransactionDate'] = pd.to_datetime(df['TransactionDate'], errors='coerce')
            df['PreviousTransactionDate'] = pd.to_datetime(df['PreviousTransactionDate'], errors='coerce')
            df['TimeSinceLast'] = (df['TransactionDate'] - df['PreviousTransactionDate']).dt.total_seconds() / 3600
            df['TimeSinceLast'] = df['TimeSinceLast'].fillna(df['TimeSinceLast'].median())

            # Create new 'Label' column based on 'LoginAttempts'
            target_col = 'Label'
            df[target_col] = np.where(df['LoginAttempts'] <= 3, 0, 1).astype(int)
            print("INFO: New 'Label' column created: 0 (Normal) if LoginAttempts < 3, else 1 (Attack).")

            # 4. Drop columns
            df = df.drop(columns=['TransactionID', 'AccountID', 'DeviceID', 'IPAddress', 'MerchantID',
                                 'TransactionDate', 'PreviousTransactionDate'], errors='ignore')

        print(f"Successfully loaded '{csv_name}' for {file_type} with shape: {df.shape}")

        # General cleaning for categorical features
        nominal_cols = df.select_dtypes(include='object').columns.tolist()
        for col in nominal_cols:
            df[col] = df[col].fillna('Unknown')

        return df, target_col

    except Exception as e:
        print(f"\nCRITICAL ERROR during {file_type} data loading: {e}")
        return None, None


# 3. Data Loading and Merging

# Upload and process Network data
network_result = upload_and_process_file('network', UNSW_NB15_COLUMNS)
if network_result is not None:
    network_df, network_target = network_result
else:
    print("Network data is missing.")
    sys.exit(1)

# Upload and process Financial data
financial_result = upload_and_process_file('financial', FINANCIAL_COLUMNS)
if financial_result is not None:
    financial_df, financial_target = financial_result
else:
    print("Financial data is missing. ")
    sys.exit(1)

# MERGE DATA
try:
    merged_df = pd.merge(
        network_df.reset_index(),
        financial_df.reset_index(),
        on='index',
        how='inner',
        suffixes=('_net', '_fin')
    ).set_index('index')

    merged_df = merged_df.rename(columns={'Label_net': 'Label'}).drop(columns=['Label_fin'], errors='ignore')

    print(f"Merge successful. Merged DataFrame shape (Inner Join): {merged_df.shape}")

except Exception as e:
    print(f"CRITICAL ERROR during Inner Join: {e}")
    print("Inner Join often fails if indexes do not align precisely.")
    sys.exit(1)


# Features (X) and Target (Y)


Y = merged_df['Label']
X = merged_df.drop(columns=['Label'])

print(f"\nMerged Data successfully created!")
print(f"Final Merged DataFrame shape: {merged_df.shape}")
print(f"Features (X) shape: {X.shape}")
print(f"Target (Y) shape: {Y.shape}")
print(f"Target (Y) distribution:\n{Y.value_counts()}")


--- UPLOAD DATA: NETWORK ---
Please upload the UNSW-NB15 file


Saving UNSW-NB15_1.csv to UNSW-NB15_1.csv
Successfully loaded 'UNSW-NB15_1.csv' for network with shape: (2962, 48)

--- UPLOAD DATA: FINANCIAL ---
Please upload the Financial Transaction file 


Saving bank_transactions_data_2.csv to bank_transactions_data_2.csv
INFO: New 'Label' column created: 0 (Normal) if LoginAttempts < 3, else 1 (Attack).
Successfully loaded 'bank_transactions_data_2.csv' for financial with shape: (2512, 12)
Merge successful. Merged DataFrame shape (Inner Join): (2512, 59)

Merged Data successfully created!
Final Merged DataFrame shape: (2512, 59)
Features (X) shape: (2512, 58)
Target (Y) shape: (2512,)
Target (Y) distribution:
Label
0    2433
1      79
Name: count, dtype: int64


In [None]:
merged_df

Unnamed: 0_level_0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,TransactionType,Location,IP Address,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,TimeSinceLast
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,59.166.0.0,1390,149.171.126.6,53,udp,CON,0.001055,132,164,31,...,Debit,San Diego,162.198.218.92,ATM,70,Doctor,81,1,5112.21,-13743.648333
1,59.166.0.0,33661,149.171.126.9,1024,udp,CON,0.036133,528,304,31,...,Debit,Houston,13.149.61.4,ATM,68,Doctor,141,1,13758.91,-11895.421111
2,59.166.0.6,1464,149.171.126.7,53,udp,CON,0.001119,146,178,31,...,Debit,Mesa,215.97.143.157,Online,19,Student,56,1,1122.35,-11581.848889
3,59.166.0.5,3593,149.171.126.5,53,udp,CON,0.001209,132,164,31,...,Debit,Raleigh,200.13.225.150,Online,26,Student,25,1,8569.06,-13167.615278
4,59.166.0.3,49664,149.171.126.0,53,udp,CON,0.001169,146,178,31,...,Credit,Atlanta,65.164.3.100,Online,26,Student,198,1,7429.40,-9230.254167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2507,59.166.0.4,22105,149.171.126.5,58482,tcp,FIN,0.053993,3806,51808,31,...,Credit,Colorado Springs,21.157.41.17,Branch,33,Doctor,109,1,12690.79,-13383.031389
2508,59.166.0.7,15392,149.171.126.4,41432,tcp,FIN,0.029047,3806,51808,31,...,Debit,Tucson,49.174.157.140,Branch,48,Doctor,177,1,254.75,-14222.581667
2509,59.166.0.8,62897,149.171.126.4,25,tcp,FIN,0.439847,37230,3380,31,...,Debit,San Diego,58.1.27.124,Branch,56,Retired,146,1,3382.91,-10574.996944
2510,59.166.0.3,61046,149.171.126.3,23183,tcp,FIN,1.454488,8928,320,31,...,Debit,Denver,21.190.11.223,Online,23,Student,19,1,1776.91,-14847.793333
