### STEP 1: Load the Dataset

In [2]:
import pandas as pd

df = pd.read_csv("dataset.csv")

df.head()


Unnamed: 0,transaction_id,account_id,transaction_type,transaction_amount,balance_before,balance_after,channel,location,timestamp,is_fraud
0,T00001,A1052,withdrawal,4491.83,4759.02,267.19,mobile,Harare,2025-04-29 02:15:31,0
1,T00002,A1077,withdrawal,666.11,1940.37,1274.26,branch,Gweru,2025-03-22 14:05:59,0
2,T00003,A1087,transfer,4453.43,2438.42,-2015.01,branch,Bulawayo,2025-05-02 15:13:01,1
3,T00004,A1050,payment,4289.13,9887.1,5597.97,online,Harare,2025-04-08 02:26:27,0
4,T00005,A1046,deposit,4112.47,7444.33,11556.8,ATM,Bulawayo,2025-04-22 18:38:18,0


### STEP 2: Preprocess the Data

In [3]:
from sklearn.preprocessing import StandardScaler

X = df.drop(columns=[
    'transaction_id', 'account_id', 'timestamp',
    'location', 'transaction_type', 'channel', 'is_fraud'
])
y = df['is_fraud']  

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


### STEP 3: Split the Data

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


### STEP 4: Train the Isolation Forest Model

In [5]:
from sklearn.ensemble import IsolationForest

model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
model.fit(X_train)


### STEP 5: Make Predictions

In [6]:
import numpy as np

# Predict (returns 1 = normal, -1 = anomaly)
y_pred = model.predict(X_test)

# Convert to binary: 1 = fraud, 0 = normal
y_pred_binary = np.where(y_pred == -1, 1, 0)


### STEP 6: Evaluate the Model

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary, zero_division=0)
recall = recall_score(y_test, y_pred_binary, zero_division=0)
f1 = f1_score(y_test, y_pred_binary, zero_division=0)

conf_matrix = confusion_matrix(y_test, y_pred_binary)
report = classification_report(y_test, y_pred_binary)

print("Evaluation Results")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)


Evaluation Results
Accuracy : 0.7433
Precision: 0.3929
Recall   : 0.0743
F1 Score : 0.1250

Confusion Matrix:
[[435  17]
 [137  11]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.96      0.85       452
           1       0.39      0.07      0.12       148

    accuracy                           0.74       600
   macro avg       0.58      0.52      0.49       600
weighted avg       0.67      0.74      0.67       600



### STEP 7: Save the Model and Scaler

In [9]:
import joblib

joblib.dump(model, "topnotch_iforest_model.pkl")
joblib.dump(scaler, "topnotch_scaler.pkl")

print("Model and scaler saved!")


Model and scaler saved!


# Audit New Transactions

### Step 1: Load the Saved Model and Scaler

In [10]:
import joblib

model = joblib.load("topnotch_iforest_model.pkl")
scaler = joblib.load("topnotch_scaler.pkl")


### Step 2: Load the New Transaction File

In [11]:
import pandas as pd

new_df = pd.read_csv("new_transactions.csv")

new_df.head()


Unnamed: 0,transaction_id,account_id,transaction_type,transaction_amount,balance_before,balance_after,channel,location,timestamp
0,T00004,A1052,deposit,1500.0,267.19,1767.19,atm,Harare,2025-05-03 10:25:40
1,T00005,A1077,transfer,500.0,1274.26,774.26,mobile,Gweru,2025-05-01 16:12:19
2,T00006,A1087,deposit,3000.0,-2015.01,984.99,branch,Bulawayo,2025-05-02 17:20:45
3,T00007,A1066,withdrawal,750.0,1200.0,450.0,atm,Mutare,2025-04-28 08:00:00
4,T00008,A1052,withdrawal,1000.0,1767.19,767.19,mobile,Harare,2025-05-04 09:30:11


### Step 3: Preprocess New Data

In [12]:
X_new = new_df.drop(columns=[
    'transaction_id', 'account_id', 'timestamp',
    'location', 'transaction_type', 'channel'
])

X_new_scaled = scaler.transform(X_new)


### Step 4: Predict Anomalies Using the Model

In [13]:
import numpy as np

predictions = model.predict(X_new_scaled)

new_df['fraud_flag'] = np.where(predictions == -1, 'Fraudulent', 'Normal')


### Step 5: Save or View the Audited Results

In [14]:
new_df.to_csv("audited_new_transactions.csv", index=False)

print("✅ Audit complete. Results saved to 'audited_new_transactions.csv'")


✅ Audit complete. Results saved to 'audited_new_transactions.csv'


In [15]:
print(new_df['fraud_flag'].value_counts())


fraud_flag
Normal        9
Fraudulent    1
Name: count, dtype: int64
