In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

import warnings
warnings.filterwarnings("ignore")

In [None]:
train=pd.read_csv("/kaggle/input/fraud-detection/fraudTrain.csv")
test=pd.read_csv("/kaggle/input/fraud-detection/fraudTest.csv")

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train.info()

In [None]:
train.describe()

In [None]:
train.shape

In [None]:
train.isnull().sum()

In [None]:
sns.heatmap(train.isnull())

In [None]:
def cleaning_data(clean):
    clean.drop(["Unnamed: 0",'cc_num','first', 'last', 'street', 'city', 'state', 'zip', 'dob', 'trans_num','trans_date_trans_time'],axis=1, inplace=True)
    clean.dropna()
    return clean

In [None]:
cleaning_data(train)

In [None]:
cleaning_data(test)

In [None]:
train['gender'] = train['gender'].map({'M': 0, 'F': 1})

In [None]:
train.info()

In [None]:
encoder = LabelEncoder()
def encode(data):
    data["merchant"] = encoder.fit_transform(data["merchant"])
    data["category"] = encoder.fit_transform(data["category"])
    data["gender"] = encoder.fit_transform(data["gender"])
    data["job"] = encoder.fit_transform(data["job"])
    return data

In [None]:
encode(train)

In [None]:
encode(test)

In [None]:
exit_counts = train["is_fraud"].value_counts()
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)  # Subplot for the pie chart
plt.pie(exit_counts, labels=["No", "YES"], autopct="%0.0f%%")
plt.title("is_fraud Counts")
plt.tight_layout()  # Adjust layout to prevent overlapping
plt.show()

In [None]:
train.corr()

In [None]:
sns.heatmap(train.corr(),annot=True,cmap='coolwarm', fmt=".2f")

In [None]:
x = train.drop(columns=["is_fraud"], inplace = False)
y = train["is_fraud"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
model1 = LogisticRegression()
model2 = RandomForestClassifier()
model3 = DecisionTreeClassifier()

In [None]:
columns = ['LogisticRegression', 'RandomForestClassifier' , 'DecisionTreeClassifier']
accuracies=[]

In [None]:
def cal (model):
    model.fit(x_train, y_train)
    predictions = model.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    
    accuracies.append(accuracy)
    
    print("-----Model Evaluation on Test Data-----")
    print(model)
    print()
    print(accuracy)

In [None]:
#LogisticRegression
cal(model1)

In [None]:
#RandomForestClassifier
cal(model2)

In [None]:
#DecisionTreeClassifier
cal(model3)

In [None]:
accuracies

In [None]:
FinalResult=pd.DataFrame({'Algorithm':columns, 'Accuracy':accuracies})

In [None]:
FinalResult

In [None]:
fig,ax=plt.subplots(figsize=(20,5))
plt.plot(FinalResult.Algorithm,accuracies,label="Accuracy")
plt.legend()
plt.show()

In [None]:
x_test = test.drop(columns=["is_fraud"], inplace = False)
y_test = test["is_fraud"]

In [None]:
#RandomForestClassifier
y_pred = model2.predict(x_test)
y_pred

In [None]:
accuracy = accuracy_score(test['is_fraud'],y_pred)
accuracy