# Importing The Libraries

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Importing The Dataset

In [20]:
train_data=pd.read_csv(r"C:\Users\itsso\Downloads\CreditCard Dataset\fraudTrain.csv")
test_data=pd.read_csv(r"C:\Users\itsso\Downloads\CreditCard Dataset\fraudTest.csv")

# Merging the datasets
fd=pd.concat([train_data,test_data])
fd.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,36.0788,-81.1781,3495,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,48.8878,-118.2105,149,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,42.1808,-112.262,4154,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,46.2306,-112.1138,1939,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,38.4207,-79.4629,99,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0


# Data Preprocessing

In [21]:
# Drop unnecessary columns
fd.drop(columns=['Unnamed: 0', 'gender', 'street', 'job', 'unix_time','trans_num','merchant','zip'], inplace=True)
fd.head()

Unnamed: 0,trans_date_trans_time,cc_num,category,amt,first,last,city,state,lat,long,city_pop,dob,merch_lat,merch_long,is_fraud
0,2019-01-01 00:00:18,2703186189652095,misc_net,4.97,Jennifer,Banks,Moravian Falls,NC,36.0788,-81.1781,3495,1988-03-09,36.011293,-82.048315,0
1,2019-01-01 00:00:44,630423337322,grocery_pos,107.23,Stephanie,Gill,Orient,WA,48.8878,-118.2105,149,1978-06-21,49.159047,-118.186462,0
2,2019-01-01 00:00:51,38859492057661,entertainment,220.11,Edward,Sanchez,Malad City,ID,42.1808,-112.262,4154,1962-01-19,43.150704,-112.154481,0
3,2019-01-01 00:01:16,3534093764340240,gas_transport,45.0,Jeremy,White,Boulder,MT,46.2306,-112.1138,1939,1967-01-12,47.034331,-112.561071,0
4,2019-01-01 00:03:06,375534208663984,misc_pos,41.96,Tyler,Garcia,Doe Hill,VA,38.4207,-79.4629,99,1986-03-28,38.674999,-78.632459,0


In [22]:
# Extract datetime feauter
fd['hour'] = pd.to_datetime(fd['trans_date_trans_time']).dt.hour
fd['day'] = pd.to_datetime(fd['trans_date_trans_time']).dt.day
fd['month'] = pd.to_datetime(fd['trans_date_trans_time']).dt.month
fd['year'] = pd.to_datetime(fd['trans_date_trans_time']).dt.year

# Drop the original datetime column
fd.drop('trans_date_trans_time', axis=1, inplace=True)
fd.head()

Unnamed: 0,cc_num,category,amt,first,last,city,state,lat,long,city_pop,dob,merch_lat,merch_long,is_fraud,hour,day,month,year
0,2703186189652095,misc_net,4.97,Jennifer,Banks,Moravian Falls,NC,36.0788,-81.1781,3495,1988-03-09,36.011293,-82.048315,0,0,1,1,2019
1,630423337322,grocery_pos,107.23,Stephanie,Gill,Orient,WA,48.8878,-118.2105,149,1978-06-21,49.159047,-118.186462,0,0,1,1,2019
2,38859492057661,entertainment,220.11,Edward,Sanchez,Malad City,ID,42.1808,-112.262,4154,1962-01-19,43.150704,-112.154481,0,0,1,1,2019
3,3534093764340240,gas_transport,45.0,Jeremy,White,Boulder,MT,46.2306,-112.1138,1939,1967-01-12,47.034331,-112.561071,0,0,1,1,2019
4,375534208663984,misc_pos,41.96,Tyler,Garcia,Doe Hill,VA,38.4207,-79.4629,99,1986-03-28,38.674999,-78.632459,0,0,1,1,2019


In [23]:
# Convert categorical columns to numerical codes
fd['category'] = pd.Categorical(fd['category']).codes
fd['first'] = pd.Categorical(fd['first']).codes
fd['last'] = pd.Categorical(fd['last']).codes
fd['city'] = pd.Categorical(fd['city']).codes
fd['state'] = pd.Categorical(fd['state']).codes
fd['state'] = pd.Categorical(fd['lat']).codes
fd['long'] = pd.Categorical(fd['long']).codes
fd['dob'] = pd.Categorical(fd['dob']).codes
fd['merch_lat'] = pd.Categorical(fd['merch_lat']).codes
fd['merch_long'] = pd.Categorical(fd['merch_long']).codes
fd.head()

Unnamed: 0,cc_num,category,amt,first,last,city,state,lat,long,city_pop,dob,merch_lat,merch_long,is_fraud,hour,day,month,year
0,2703186189652095,8,4.97,164,18,532,295,36.0788,704,3495,791,550600,1223201,0,0,1,1,2019
1,630423337322,4,107.23,312,161,619,978,48.8878,62,149,619,1745263,110910,0,0,1,1,2019
2,38859492057661,0,220.11,116,386,474,747,42.1808,90,4154,309,1451077,169563,0,0,1,1,2019
3,3534093764340240,2,45.0,165,468,84,944,46.2306,93,1939,405,1697797,164676,0,0,1,1,2019
4,375534208663984,9,41.96,339,153,217,405,38.4207,764,99,746,787219,1458121,0,0,1,1,2019


In [24]:
# Scale the numerical columns using StandardScaler
scaler= StandardScaler()
fd[['category','first','last','city','state','long','dob','merch_lat','merch_long']]=scaler.fit_transform(fd[['category','first','last','city','state','long','dob','merch_lat','merch_long']])
fd.head()

Unnamed: 0,cc_num,category,amt,first,last,city,state,lat,long,city_pop,dob,merch_lat,merch_long,is_fraud,hour,day,month,year
0,2703186189652095,0.452162,4.97,-0.183377,-1.715579,0.311413,-0.676149,36.0788,0.766675,3495,0.909287,-0.657937,0.605722,0,0,1,1,2019
1,630423337322,-0.570001,107.23,1.319765,-0.677196,0.642985,1.713517,48.8878,-1.504897,149,0.275196,1.723197,-1.528903,0,0,1,1,2019
2,38859492057661,-1.592163,220.11,-0.670883,0.956622,0.090365,0.905299,42.1808,-1.405826,4154,-0.867643,1.136842,-1.416341,0,0,1,1,2019
3,3534093764340240,-1.081082,45.0,-0.173221,1.552058,-1.395991,1.594559,46.2306,-1.395211,1939,-0.513731,1.62859,-1.42572,0,0,1,1,2019
4,375534208663984,0.707702,41.96,1.593987,-0.735288,-0.889105,-0.291283,38.4207,0.978971,99,0.743391,-0.186321,1.056562,0,0,1,1,2019


In [25]:
# Split the data into features(X) and target(y)
X=fd.drop(['is_fraud'], axis=1)
y=fd['is_fraud']

In [26]:
#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

# Training and Testing The Models

In [29]:
# Evaluate Logistic Regression Model
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
log_accu=accuracy_score(y_test, y_pred_lr)

# Evaluate Decision Tree Model
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
dt_accu=accuracy_score(y_test, y_pred_lr)

# Evaluate Random Forest Model
rf = RandomForestClassifier(n_estimators=10)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
rf_accu=accuracy_score(y_test, y_pred_lr)

In [30]:
#Compare the accuracy of the models
Accuracy = pd.DataFrame({'LogisticRegression':[log_accu],'RandomForest': [rf_accu],'DecisionTree':[dt_accu]})
Accuracy     

Unnamed: 0,LogisticRegression,RandomForest,DecisionTree
0,0.994979,0.994979,0.994979
