In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Load Sample UPI Transaction Data
df = pd.read_csv("/content/drive/My Drive/Major Project/Dataset/UPI_Fraud_Dataset.csv")  # Replace with actual dataset

# Display basic info
print("Dataset Summary:\n", df.info())
print("Missing Values:\n", df.isnull().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Transaction_ID               5000 non-null   int64  
 1   Sender_UPI_ID                5000 non-null   object 
 2   Receiver_UPI_ID              5000 non-null   object 
 3   Transaction_Amount           5000 non-null   float64
 4   Transaction_Time             5000 non-null   object 
 5   Device_Type                  5000 non-null   object 
 6   IP_Change_Flag               5000 non-null   int64  
 7   Location_Change_Flag         5000 non-null   int64  
 8   Multiple_Quick_Transactions  5000 non-null   int64  
 9   Transaction_Fraud_Flag       5000 non-null   int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 390.8+ KB
Dataset Summary:
 None
Missing Values:
 Transaction_ID                 0
Sender_UPI_ID                  0
Receiver_UPI_ID            

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from imblearn.over_sampling import SMOTE  # Handle class imbalance

# Load the dataset
df = pd.read_csv("/content/drive/My Drive/Major Project/Dataset/UPI_Fraud_Dataset.csv")

# Drop irrelevant columns (IDs are unique and not predictive)
df.drop(columns=["Transaction_ID", "Sender_UPI_ID", "Receiver_UPI_ID"], inplace=True)

# Handle missing values (fill numerical NaN with median)
df.fillna(df.median(numeric_only=True), inplace=True)

# Convert Transaction_Time to datetime format
df["Transaction_Time"] = pd.to_datetime(df["Transaction_Time"])

# Extract time-based features
df["Hour"] = df["Transaction_Time"].dt.hour  # Hour of transaction
df["Day"] = df["Transaction_Time"].dt.dayofweek  # Day of the week
df.drop(columns=["Transaction_Time"], inplace=True)  # Drop original timestamp

# Encode categorical column (Device_Type)
le = LabelEncoder()
df["Device_Type"] = le.fit_transform(df["Device_Type"])

# Define features (X) and target (y)
X = df.drop(columns=["Transaction_Fraud_Flag"]).values
y = df["Transaction_Fraud_Flag"].values

# Standardize numerical features (Transaction Amount)
scaler = StandardScaler()
X[:, 0] = scaler.fit_transform(X[:, 0].reshape(-1, 1)).flatten()  # Assuming Transaction_Amount is at index 0

# Handle class imbalance using SMOTE (only if dataset is imbalanced)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Reshape for LSTM (samples, timesteps, features)
X_resampled = X_resampled.reshape((X_resampled.shape[0], 1, X_resampled.shape[1]))

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Build LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(1, X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dense(1, activation="sigmoid")
])

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))



Epoch 1/10


  super().__init__(**kwargs)


[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - accuracy: 0.5025 - loss: 0.6932 - val_accuracy: 0.5605 - val_loss: 0.6876
Epoch 2/10
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.5555 - loss: 0.6867 - val_accuracy: 0.5589 - val_loss: 0.6855
Epoch 3/10
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5640 - loss: 0.6818 - val_accuracy: 0.5737 - val_loss: 0.6781
Epoch 4/10
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5621 - loss: 0.6757 - val_accuracy: 0.5800 - val_loss: 0.6730
Epoch 5/10
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5740 - loss: 0.6741 - val_accuracy: 0.5947 - val_loss: 0.6670
Epoch 6/10
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6014 - loss: 0.6598 - val_accuracy: 0.5832 - val_loss: 0.6691
Epoch 7/10
[1m238/238[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7ddccab36250>

In [None]:
# Convert back to DataFrame for saving
df_resampled = pd.DataFrame(X_resampled.reshape(X_resampled.shape[0], X_resampled.shape[2]), columns=df.drop(columns=["Transaction_Fraud_Flag"]).columns)
df_resampled["Transaction_Fraud_Flag"] = y_resampled  # Add target column back

# Save to CSV
df_resampled.to_csv("/content/drive/My Drive/Major Project/Dataset/UPI_Fraud_Preprocessed.csv", index=False)

print("Preprocessed dataset saved successfully!")


Preprocessed dataset saved successfully!


In [None]:
model.save("/content/drive/My Drive/Major Project/Models/upi_fraud_lstm_model_final.h5")

print("Deep Learning Model Trained and Saved as 'upi_fraud_lstm_model_final.h5'.")



Deep Learning Model Trained and Saved as 'upi_fraud_lstm_model_final.h5'.


In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6251 - loss: 0.6495
Test Accuracy: 0.6200


In [None]:
import numpy as np

# Example new transaction (ensure it has 7 features)
new_transaction = np.array([[5000, 1, 0, 1, 15, 2, 1]])  # Modify values as needed

# Standardize numerical features (Transaction_Amount is at index 0)
new_transaction[:, 0] = scaler.transform(new_transaction[:, 0].reshape(-1, 1)).flatten()

# Reshape for LSTM (samples, timesteps, features)
new_transaction_reshaped = new_transaction.reshape((1, 1, new_transaction.shape[1]))

# Make prediction
prediction = model.predict(new_transaction_reshaped)

# Extract fraud probability
fraud_probability = prediction[0][0]

# Print result
if fraud_probability > 0.5:
    print(f"🚨 ALERT: Possible Fraud! (Confidence: {fraud_probability:.2%})")
else:
    print(f"✅ Safe Transaction (Confidence: {(1 - fraud_probability):.2%})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step
✅ Safe Transaction (Confidence: 99.98%)


In [None]:
import numpy as np

# Example fraudulent transaction (scaled and preprocessed)
new_transaction = np.array([[1.042724982, 0, 0, 0, 0, 0, 0, 1]])

# Remove the fraud label (last column) to match model input shape
new_transaction = new_transaction[:, :-1]

# Reshape for LSTM model (samples=1, timesteps=1, features=7)
new_transaction_reshaped = new_transaction.reshape((1, 1, new_transaction.shape[1]))

# Predict fraud probability
fraud_probability = model.predict(new_transaction_reshaped)[0][0]

# Interpret the prediction
threshold = 0.5  # Standard fraud detection threshold
if fraud_probability >= threshold:
    print(f"⚠️ Fraudulent Transaction Detected! (Confidence: {fraud_probability * 100:.2f}%)")
else:
    print(f"✅ Safe Transaction (Confidence: {100 - fraud_probability * 100:.2f}%)")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
✅ Safe Transaction (Confidence: 56.44%)


In [None]:
# Get predictions for test data
y_pred_prob = model.predict(X_test)
y_pred_labels = (y_pred_prob >= 0.5).astype(int)

# Check fraud classification accuracy
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_labels))

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
              precision    recall  f1-score   support

           0       0.65      0.51      0.57       945
           1       0.60      0.73      0.66       955

    accuracy                           0.62      1900
   macro avg       0.63      0.62      0.62      1900
weighted avg       0.63      0.62      0.62      1900



In [43]:
import pandas as pd

# Load preprocessed dataset
df_preprocessed = pd.read_csv("/content/drive/My Drive/Major Project/Dataset/UPI_Fraud_Preprocessed.csv")

# Extract fraudulent transactions
fraudulent_transactions = df_preprocessed[df_preprocessed["Transaction_Fraud_Flag"] == 1]
fraudulent_features = fraudulent_transactions.drop(columns=["Transaction_Fraud_Flag"]).values

# Reshape for LSTM
fraudulent_features_reshaped = fraudulent_features.reshape((fraudulent_features.shape[0], 1, fraudulent_features.shape[1]))

print(f"Fraudulent Transactions Shape: {fraudulent_features_reshaped.shape}")


Fraudulent Transactions Shape: (4749, 1, 7)


In [44]:
# Make predictions
fraud_predictions = model.predict(fraudulent_features_reshaped)

# Convert predictions to fraud probabilities
fraud_probabilities = fraud_predictions.flatten()

# Display results
for i, prob in enumerate(fraud_probabilities):
    print(f"Transaction {i+1}: Fraud Probability: {prob*100:.2f}%")


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Transaction 1: Fraud Probability: 43.56%
Transaction 2: Fraud Probability: 55.79%
Transaction 3: Fraud Probability: 51.50%
Transaction 4: Fraud Probability: 52.84%
Transaction 5: Fraud Probability: 61.02%
Transaction 6: Fraud Probability: 30.30%
Transaction 7: Fraud Probability: 6.08%
Transaction 8: Fraud Probability: 63.24%
Transaction 9: Fraud Probability: 15.79%
Transaction 10: Fraud Probability: 34.89%
Transaction 11: Fraud Probability: 10.88%
Transaction 12: Fraud Probability: 55.72%
Transaction 13: Fraud Probability: 11.89%
Transaction 14: Fraud Probability: 40.41%
Transaction 15: Fraud Probability: 33.18%
Transaction 16: Fraud Probability: 34.79%
Transaction 17: Fraud Probability: 40.39%
Transaction 18: Fraud Probability: 55.83%
Transaction 19: Fraud Probability: 59.07%
Transaction 20: Fraud Probability: 45.21%
Transaction 21: Fraud Probability: 43.89%
Transaction 22: Fraud Probability: 41.13%
Transaction