In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("/kaggle/input/upi-payment-transactions-dataset/transactions.csv")

# Display the first few rows of the dataset
print(df.head())
# Check for missing values and data types
print(df.info())

                         Transaction ID            Timestamp      Sender Name  \
0  4d3db980-46cd-4158-a812-dcb77055d0d2  2024-06-22 04:06:38        Tiya Mall   
1  099ee548-2fc1-4811-bf92-559c467ca792  2024-06-19 06:04:49  Mohanlal Bakshi   
2  d4c05732-6b1b-4bab-90b9-efe09d252b99  2024-06-04 04:56:09      Kismat Bora   
3  e8df92ee-8b04-4133-af5a-5f412180c8ab  2024-06-09 09:56:07    Ayesha Korpal   
4  e7d675d3-04f1-419c-a841-7a04662560b7  2024-06-25 08:38:19      Jivin Batta   

           Sender UPI ID   Receiver Name     Receiver UPI ID  Amount (INR)  \
0      4161803452@okaxis  Mohanlal Golla    7776849307@okybl       3907.34   
1      8908837379@okaxis  Mehul Sankaran   7683454560@okaxis       8404.55   
2       4633654150@okybl       Diya Goel  2598130823@okicici        941.88   
3  7018842771@okhdfcbank    Rhea Kothari   2246623650@okaxis       8926.00   
4       1977143985@okybl     Baiju Issac    5245672729@okybl       2800.55   

    Status  
0   FAILED  
1  SUCCESS  
2  SU

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
label_encoder = LabelEncoder()
df['Sender Name'] = label_encoder.fit_transform(df['Sender Name'])
df['Sender UPI ID'] = label_encoder.fit_transform(df['Sender UPI ID'])
df['Receiver Name'] = label_encoder.fit_transform(df['Receiver Name'])
df['Receiver UPI ID'] = label_encoder.fit_transform(df['Receiver UPI ID'])
df['Status'] = label_encoder.fit_transform(df['Status'])

# Convert Timestamp to datetime and extract features
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['Year'] = df['Timestamp'].dt.year
df['Month'] = df['Timestamp'].dt.month
df['Day'] = df['Timestamp'].dt.day
df['Hour'] = df['Timestamp'].dt.hour

# Drop the original Timestamp column
df.drop(columns=['Timestamp', 'Transaction ID'], inplace=True)

# Define features and target variable
X = df.drop(columns=['Status'])
y = df['Status']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.49
Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.48      0.48        97
           1       0.50      0.50      0.50       103

    accuracy                           0.49       200
   macro avg       0.49      0.49      0.49       200
weighted avg       0.49      0.49      0.49       200

Confusion Matrix:
 [[47 50]
 [52 51]]


In [4]:
import xgboost as xgb

# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Train the XGBoost model
xgb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate the XGBoost model
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("XGBoost Classification Report:\n", classification_report(y_test, y_pred_xgb))
print("XGBoost Confusion Matrix:\n", confusion_matrix(y_test, y_pred_xgb))

XGBoost Accuracy: 0.505
XGBoost Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.49      0.49        97
           1       0.52      0.51      0.52       103

    accuracy                           0.51       200
   macro avg       0.50      0.50      0.50       200
weighted avg       0.51      0.51      0.51       200

XGBoost Confusion Matrix:
 [[48 49]
 [50 53]]
