<a href="https://colab.research.google.com/github/Arasavelli-Sai-Sankar/Machine-Learning/blob/main/Credit_Card_Fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.compose import ColumnTransformer

In [None]:
# Step 1: Load the datasets
fraudtrain_path = "/content/fraudTrain.csv"  # Update the path
fraudtest_path = "/content/fraudTest.csv"  # Update the path
# Load train and test datasets
train_df = pd.read_csv(fraudtrain_path)
test_df = pd.read_csv(fraudtest_path)

In [None]:
# Step 2: Explore the datasets (Optional, for debugging)
print("Train Dataset Shape:", train_df.shape)
print("Test Dataset Shape:", test_df.shape)
print("\nTrain Data Sample:\n", train_df.head())
print("\nTest Data Sample:\n", test_df.head())

Train Dataset Shape: (1296675, 23)
Test Dataset Shape: (555719, 23)

Train Data Sample:
    Unnamed: 0 trans_date_trans_time            cc_num  \
0           0   2019-01-01 00:00:18  2703186189652095   
1           1   2019-01-01 00:00:44      630423337322   
2           2   2019-01-01 00:00:51    38859492057661   
3           3   2019-01-01 00:01:16  3534093764340240   
4           4   2019-01-01 00:03:06   375534208663984   

                             merchant       category     amt      first  \
0          fraud_Rippin, Kub and Mann       misc_net    4.97   Jennifer   
1     fraud_Heller, Gutmann and Zieme    grocery_pos  107.23  Stephanie   
2                fraud_Lind-Buckridge  entertainment  220.11     Edward   
3  fraud_Kutch, Hermiston and Farrell  gas_transport   45.00     Jeremy   
4                 fraud_Keeling-Crist       misc_pos   41.96      Tyler   

      last gender                        street  ...      lat      long  \
0    Banks      F                561 Perry

In [None]:
# Step 3: Preprocessing
# Separate features and labels in train and test datasets
X_train = train_df.drop(columns=['is_fraud','first','last','gender','street','city','state','merchant','zip','job','dob','unix_time','trans_num','category','cc_num','trans_date_trans_time'])  # Drop target column from train set
y_train = train_df['is_fraud']  # Target column for train set

X_test = test_df.drop(columns=['is_fraud','first','last','gender','street','city','state','merchant','zip','job','dob','unix_time','trans_num','category','cc_num','trans_date_trans_time'])  # Drop target column from test set
y_test = test_df['is_fraud']  # Target column for test set

# Remove unnecessary columns (e.g., transaction IDs, if present)
if 'transaction_id' in X_train.columns:
    X_train = X_train.drop(columns=['transaction_id'])
    X_test = X_test.drop(columns=['transaction_id'])

# Standardize features (important for some models like Logistic Regression)
scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Step 4: Train Logistic Regression model
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

In [None]:
# Step 5: Train Decision Tree model
dtree = DecisionTreeClassifier(max_depth=5, random_state=42)
dtree.fit(X_train, y_train)

In [None]:
# Step 6: Train Random Forest model
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)

In [None]:
# Step 7: Evaluate all models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"\nModel: {model.__class__.__name__}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

print("\n--- Model Evaluation ---")
evaluate_model(log_reg, X_test, y_test)
evaluate_model(dtree, X_test, y_test)
evaluate_model(rf, X_test, y_test)


--- Model Evaluation ---

Model: LogisticRegression
Accuracy: 0.5008340546211305
Confusion Matrix:
 [[277103 276471]
 [   925   1220]]




Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.50      0.67    553574
           1       0.00      0.57      0.01      2145

    accuracy                           0.50    555719
   macro avg       0.50      0.53      0.34    555719
weighted avg       0.99      0.50      0.66    555719


Model: DecisionTreeClassifier
Accuracy: 0.9961401355721147
Confusion Matrix:
 [[553574      0]
 [  2145      0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    553574
           1       0.00      0.00      0.00      2145

    accuracy                           1.00    555719
   macro avg       0.50      0.50      0.50    555719
weighted avg       0.99      1.00      0.99    555719


Model: RandomForestClassifier
Accuracy: 0.9961401355721147
Confusion Matrix:
 [[553574      0]
 [  2145      0]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    553574
           1       0.00      0.00      0.00      2145

    accuracy                           1.00    555719
   macro avg       0.50      0.50      0.50    555719
weighted avg       0.99      1.00      0.99    555719



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Step 8: Predict new transactions
new_transaction = np.array([X_test[0]])  # Example: Using the first test sample
prediction = rf.predict(new_transaction)
print("\nPredicted Class (0=Legitimate, 1=Fraudulent):", prediction[0])


Predicted Class (0=Legitimate, 1=Fraudulent): 0


