# üõ°Ô∏è ONLINE PAYMENTS FRAUD DETECTION
### Machine Learning Project
**Algorithm:** Random Forest Classifier  
**Dataset:** PaySim - 6.3 Million Transactions

## Step 1 ‚Äî Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pickle
import warnings
warnings.filterwarnings('ignore')
print('‚úÖ Libraries imported successfully!')

## Step 2 ‚Äî Load Dataset

In [None]:
df = pd.read_csv('../data/fraud_dataset.csv')
print(f'Dataset Shape: {df.shape}')
print(f'\nColumns: {df.columns.tolist()}')
print(f'\nFirst 5 rows:')
df.head()

## Step 3 ‚Äî Explore Data

In [None]:
print('Fraud Distribution:')
print(df['isFraud'].value_counts())
print(f'\nMissing Values: {df.isnull().sum().sum()}')
print(f'\nTransaction Types:')
print(df['type'].value_counts())

## Step 4 ‚Äî Preprocess Data

In [None]:
# Encode transaction type
le = LabelEncoder()
df['type'] = le.fit_transform(df['type'])

# Drop unnecessary columns
df = df.drop(['nameOrig', 'nameDest', 'isFlaggedFraud'], axis=1)

# Balance dataset
fraud_df = df[df['isFraud'] == 1]
non_fraud_df = df[df['isFraud'] == 0].sample(n=30000, random_state=42)
balanced_df = pd.concat([fraud_df, non_fraud_df]).sample(frac=1, random_state=42)

print(f'Fraud samples     : {len(fraud_df):,}')
print(f'Non-fraud samples : {len(non_fraud_df):,}')
print(f'Total balanced    : {len(balanced_df):,}')

## Step 5 ‚Äî Split Data

In [None]:
X = balanced_df.drop('isFraud', axis=1)
y = balanced_df['isFraud']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f'Training samples : {X_train.shape[0]:,}')
print(f'Testing samples  : {X_test.shape[0]:,}')

## Step 6 ‚Äî Train Model

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
print('‚úÖ Model trained successfully!')

## Step 7 ‚Äî Evaluate Model

In [None]:
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f'‚úÖ Accuracy: {acc*100:.2f}%')
print('\nClassification Report:')
print(classification_report(y_test, y_pred, target_names=['Legitimate', 'Fraud']))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

## Step 8 ‚Äî Save Model

In [None]:
with open('payments.pkl', 'wb') as f:
    pickle.dump(model, f)

import shutil
shutil.copy('payments.pkl', '../flask/payments.pkl')

print('‚úÖ Model saved as payments.pkl')
print('‚úÖ Model copied to flask/payments.pkl')
print('\nüëâ Now run: cd ../flask && python3 app.py')