<a href="https://colab.research.google.com/github/SoniReddyMaram/CODSOFT/blob/main/Credit_Card_Fraud_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, roc_auc_score


In [2]:
# Load the dataset
df = pd.read_csv('fraudTest.csv')

# Check for missing values
df.info()

df.drop(columns=['Unnamed: 0'], inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555719 entries, 0 to 555718
Data columns (total 23 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Unnamed: 0             555719 non-null  int64  
 1   trans_date_trans_time  555719 non-null  object 
 2   cc_num                 555719 non-null  int64  
 3   merchant               555719 non-null  object 
 4   category               555719 non-null  object 
 5   amt                    555719 non-null  float64
 6   first                  555719 non-null  object 
 7   last                   555719 non-null  object 
 8   gender                 555719 non-null  object 
 9   street                 555719 non-null  object 
 10  city                   555719 non-null  object 
 11  state                  555719 non-null  object 
 12  zip                    555719 non-null  int64  
 13  lat                    555719 non-null  float64
 14  long                   555719 non-nu

In [3]:
df.isnull().sum()

Unnamed: 0,0
trans_date_trans_time,0
cc_num,0
merchant,0
category,0
amt,0
first,0
last,0
gender,0
street,0
city,0


In [4]:
encoder=LabelEncoder()
df['merchant']=encoder.fit_transform(df['merchant'])
df['category']=encoder.fit_transform(df['category'])
df['street']=encoder.fit_transform(df['street'])
df['job']=encoder.fit_transform(df['job'])
df['trans_num']=encoder.fit_transform(df['trans_num'])
df['first']=encoder.fit_transform(df['first'])
df['city']=encoder.fit_transform(df['city'])
df['state']=encoder.fit_transform(df['state'])
df['last']=encoder.fit_transform(df['last'])
df['gender']=encoder.fit_transform(df['gender'])
df['trans_date_trans_time']=encoder.fit_transform(df['trans_date_trans_time'])
df['dob']=encoder.fit_transform(df['dob'])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555719 entries, 0 to 555718
Data columns (total 22 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   trans_date_trans_time  555719 non-null  int64  
 1   cc_num                 555719 non-null  int64  
 2   merchant               555719 non-null  int64  
 3   category               555719 non-null  int64  
 4   amt                    555719 non-null  float64
 5   first                  555719 non-null  int64  
 6   last                   555719 non-null  int64  
 7   gender                 555719 non-null  int64  
 8   street                 555719 non-null  int64  
 9   city                   555719 non-null  int64  
 10  state                  555719 non-null  int64  
 11  zip                    555719 non-null  int64  
 12  lat                    555719 non-null  float64
 13  long                   555719 non-null  float64
 14  city_pop               555719 non-nu

In [6]:
# Splitting the data into features and target
X = df.drop('is_fraud', axis=1)
y = df['is_fraud']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)


In [8]:
logreg = LogisticRegression()
logreg.fit(X_train_res, y_train_res)
y_pred_logreg = logreg.predict(X_test)

In [9]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train_res, y_train_res)
y_pred_dtree = dtree.predict(X_test)

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train_res, y_train_res)
y_pred_rf = rf.predict(X_test)

In [11]:
# Evaluation Function
def evaluate_model(y_true, y_pred):
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("ROC AUC Score:", roc_auc_score(y_true, y_pred))

# Logistic Regression Evaluation
print("Logistic Regression Evaluation:")
evaluate_model(y_test, y_pred_logreg)

# Decision Tree Evaluation
print("Decision Tree Evaluation:")
evaluate_model(y_test, y_pred_dtree)

# Random Forest Evaluation
print("Random Forest Evaluation:")
evaluate_model(y_test, y_pred_rf)

Logistic Regression Evaluation:
Confusion Matrix:
 [[110718      0]
 [   426      0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00    110718
           1       0.00      0.00      0.00       426

    accuracy                           1.00    111144
   macro avg       0.50      0.50      0.50    111144
weighted avg       0.99      1.00      0.99    111144

Accuracy: 0.9961671345281796
ROC AUC Score: 0.5
Decision Tree Evaluation:
Confusion Matrix:
 [[109709   1009]
 [   119    307]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99    110718
           1       0.23      0.72      0.35       426

    accuracy                           0.99    111144
   macro avg       0.62      0.86      0.67    111144
weighted avg       1.00      0.99      0.99    111144

Accuracy: 0.9898510041027856
ROC AUC Score: 0.8557720171714002
