In [67]:
#Importing the dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score

In [3]:
#Loading the dataset to a Pandas DataFrame
credit_df = pd.read_csv('/Users/macminhanh/Downloads/creditcard.csv')

print(credit_df.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [4]:
#Getting information about the dataset
credit_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [5]:
#Checking if there are any missing values in each collum
credit_df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [6]:
#Checking the distribution of legit transactions and fraud transactions
credit_df['Class'].value_counts()

Class
0    284315
1       492
Name: count, dtype: int64

In [7]:
#As we can see, the dataset is unbalanced, which makes it hard to apply machine learning. Applying machine learning models directly to such a skewed dataset will likely result in a model biased towards predicting transactions as legitimate, simply because they are the overwhelming majority. This could lead to high accuracy in predicting legitimate transactions while failing to adequately identify fraudulent ones, which are typically the focus in fraud detection.

In [None]:
#DATA ANALYSIS

In [8]:
# Separting the data for analysis
legit = credit_df[credit_df.Class == 0 ]
fraud = credit_df[credit_df.Class == 1 ]

In [9]:
print(legit.shape)
print(fraud.shape)

(284315, 31)
(492, 31)


In [13]:
#Statistical measures of the data
legit.Amount.describe()

count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64

In [14]:
fraud.Amount.describe()

count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64

In [15]:
#Comparing the values for both transactions
credit_df.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,94838.202258,0.008258,-0.006271,0.012171,-0.00786,0.005453,0.002419,0.009637,-0.000987,0.004467,...,-0.000644,-0.001235,-2.4e-05,7e-05,0.000182,-7.2e-05,-8.9e-05,-0.000295,-0.000131,88.291022
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [16]:
# These features show significant differences in their means between the two classes. For example, V1 has a mean of -4.77 for fraudulent transactions, compared to 0.01 for legitimate ones. This stark contrast highlights that these components capture crucial differences in transaction patterns between frauds and legitimate transactions.
#The differences in averages across many of the features for the two classes of transactions underscore the importance of these features in distinguishing between legitimate and fraudulent activities. Machine learning models can leverage these differences to predict whether a new transaction is likely to be fraudulent.

In [17]:
#DEALING WITH THE UNBALANCED DATASET (under-sampling technique)
#Building a sample dataset from the original dataset, containing similar distribution of normal transactions and fraudulent transaction.

legit_sample = legit.sample(n=492)

In [18]:
#Concatenting two DataFrames
new_df = pd.concat([legit_sample, fraud], axis=0)

In [19]:
new_df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
22060,32009.0,-3.38162,3.610021,0.135225,1.81119,-0.284175,0.428896,0.261811,0.551994,0.740013,...,-0.671385,-0.696388,-0.014595,-0.596174,0.160653,-0.088964,0.353296,-0.472854,5.47,0
49018,43877.0,-0.585841,-3.956089,0.106223,0.202965,-2.144219,1.248379,-0.173974,0.261082,-0.170545,...,0.667984,-0.001106,-0.759348,-0.222817,-0.152185,-0.299782,-0.095343,0.169122,939.6,0
16204,27613.0,-1.280478,0.946271,0.869264,0.755125,-0.295337,0.158131,1.235841,-0.009555,-0.42362,...,0.027565,0.814026,0.192402,0.240439,-0.224926,-0.357396,0.127663,-0.338903,150.3,0
212337,138816.0,-0.796401,-0.505182,1.696613,-1.257768,-0.641663,2.52594,-1.346652,0.872373,1.192281,...,-0.120579,0.903131,0.259966,-0.290484,-1.482635,0.629709,0.450615,0.426489,22.5,0
271290,164507.0,0.09188,0.784483,-0.69235,1.395198,1.298409,-0.67697,0.753286,-0.188138,-0.653536,...,0.036339,0.200205,-0.086185,0.525795,-0.545794,0.712574,0.176394,0.231699,14.36,0


In [20]:
# Now the new DataFrame that has an equal number of legitimate and fraudulent transactions. Each class now has 492 instances, making the dataset balanced

In [21]:
new_df['Class'].value_counts()

Class
0    492
1    492
Name: count, dtype: int64

In [22]:
new_df.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,93727.284553,-0.082078,0.016261,0.069812,-0.048031,0.049636,-0.116466,0.052577,-0.04972,0.022469,...,0.001388,-0.001936,0.006145,0.012964,0.027241,-0.032764,0.012265,0.021026,0.004676,85.942886
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [23]:
# With the new dataset, the mean values are only slightly different, indicating that this new dataset reflects the original dataset pretty well and can be used for machine learning.

In [24]:
#Splitting the new dataset into features and targets

X = new_df.drop(columns='Class', axis=1) #features
Y = new_df['Class'] #targets

print(X)

            Time        V1        V2        V3        V4        V5        V6  \
22060    32009.0 -3.381620  3.610021  0.135225  1.811190 -0.284175  0.428896   
49018    43877.0 -0.585841 -3.956089  0.106223  0.202965 -2.144219  1.248379   
16204    27613.0 -1.280478  0.946271  0.869264  0.755125 -0.295337  0.158131   
212337  138816.0 -0.796401 -0.505182  1.696613 -1.257768 -0.641663  2.525940   
271290  164507.0  0.091880  0.784483 -0.692350  1.395198  1.298409 -0.676970   
...          ...       ...       ...       ...       ...       ...       ...   
279863  169142.0 -1.927883  1.125653 -4.518331  1.749293 -1.566487 -2.010494   
280143  169347.0  1.378559  1.289381 -5.004247  1.411850  0.442581 -1.326536   
280149  169351.0 -0.676143  1.126366 -2.213700  0.468308 -1.120541 -0.003346   
281144  169966.0 -3.113832  0.585864 -5.399730  1.817092 -0.840618 -2.943548   
281674  170348.0  1.991976  0.158476 -2.583441  0.408670  1.151147 -0.096695   

              V7        V8        V9  .

In [25]:
print(Y)

22060     0
49018     0
16204     0
212337    0
271290    0
         ..
279863    1
280143    1
280149    1
281144    1
281674    1
Name: Class, Length: 984, dtype: int64


In [27]:
#Splitting the data into training data and testing data

X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, stratify=Y, random_state=2)

In [28]:
print(X.shape, X_train.shape, X_test.shape)

(984, 30) (787, 30) (197, 30)


In [29]:
#Model Training
#Logistic Regression

model = LogisticRegression()

In [30]:
#Training the Logistic Regression Model with Training Data
model.fit(X_train, Y_train)

In [33]:
#Model Evalutaion based on Accuracy Score
#Accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

print('Accuracy on Training Data:',training_data_accuracy)

Accuracy on Training Data: 0.9263024142312579


In [34]:
#Accuracy on testing data
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction, Y_test)

print('Accuracy on Testing Data:', testing_data_accuracy)

Accuracy on Testing Data: 0.9035532994923858


In [55]:
#Model Evaluation based on Precision Score - what proportion of transactions that the model predicted as fraudulent were actually fraudulent.


#Precision on training data
lr_train_precision = precision_score(Y_train, X_train_prediction)
print('Logistic Regression Training Precision:', lr_train_precision)

Logistic Regression Training Precision: 0.9516129032258065


In [57]:
#Precision on testing data
lr_test_precision = precision_score(Y_test, X_test_prediction)
print('Logistic Regression Testing Precision:', lr_test_precision)

Logistic Regression Testing Precision: 0.9438202247191011


In [58]:
#Model Evaluation based on Recall Score  - the ability of the model to correctly identify all actual cases of fraud

#Recall Score on training data
lr_train_recall = recall_score(Y_train, X_train_prediction)
print('Logistic Regression Training Recall:', lr_train_recall)

Logistic Regression Training Recall: 0.8984771573604061


In [59]:
#Recall Score on testing data
lr_test_recall = recall_score(Y_test, X_test_prediction)
print('Logistic Regression Testing Recall:', lr_test_recall)

Logistic Regression Testing Recall: 0.8571428571428571


In [60]:
#Model Evaluation based on F1 Score - overall performance of the machine learning model by balancing precision and recall

#F1 Score on training data
lr_train_f1 = f1_score(Y_train, X_train_prediction)
print('Logistic Regression Training F1 Score:', lr_train_f1)

Logistic Regression Training F1 Score: 0.9242819843342037


In [61]:
#F1 Score on testing data
lr_test_f1 = f1_score(Y_test, X_test_prediction)
print('Logistic Regression Testing F1 Score:', lr_test_f1)

Logistic Regression Testing F1 Score: 0.8983957219251337


In [37]:
#Decision Trees
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=2)

In [38]:
# Training the Decision Tree model
dt_model.fit(X_train, Y_train)

In [41]:
#Accuracy on training data
dt_train_predictions = dt_model.predict(X_train)
dt_train_accuracy = accuracy_score(Y_train, dt_train_predictions)
print('Decision Tree Accuracy on Training Data:', dt_train_accuracy)

Decision Tree Accuracy on Training Data: 1.0


In [None]:
#This result indicates that the Decision Tree model has achieved perfect accuracy on the training dataset, meaning it correctly classified every single instance in the training data.
#While a training accuracy of 1.0 might initially seem excellent, it is often indicative of overfitting

In [45]:
#Accuracy on testing data
dt_test_predictions = dt_model.predict(X_test)
dt_test_accuracy = accuracy_score(Y_test, dt_test_predictions)
print('Decision Tree Accuracy on Testing Data:', dt_test_accuracy)

Decision Tree Accuracy on Testing Data: 0.8883248730964467


In [None]:
#The accuracy on the testing data is considerably lower than on the training data. An accuracy of about 88.83% is still quite high, but the discrepancy suggests that the model does not perform as well on unseen data, reinforcing the suspicion of overfitting.

In [64]:
#Precision Score on traing and testing data

dt_train_precision = precision_score(Y_train, dt_train_predictions)
dt_test_precision = precision_score(Y_test, dt_test_predictions)

print('Decision Tree Training Precision:', dt_train_precision)
print('Decision Tree Testing Precision:', dt_test_precision)

Decision Tree Training Precision: 1.0
Decision Tree Testing Precision: 0.9042553191489362


In [65]:
# Recall Score on training and testing data

dt_train_recall = recall_score(Y_train, dt_train_predictions)
dt_test_recall = recall_score(Y_test, dt_test_predictions)

print('Decision Tree Training Recall:', dt_train_recall)
print('Decision Tree Testing Recall:', dt_test_recall)

Decision Tree Training Recall: 1.0
Decision Tree Testing Recall: 0.8673469387755102


In [66]:
# F1 Score on training and testing data

dt_train_f1 = f1_score(Y_train, dt_train_predictions)
dt_test_f1 = f1_score(Y_test, dt_test_predictions)

print('Decision Tree Training F1 Score:', dt_train_f1)
print('Decision Tree Testing F1 Score:', dt_test_f1)

Decision Tree Training F1 Score: 1.0
Decision Tree Testing F1 Score: 0.8854166666666667


In [62]:
#XGBoost

!pip install xgboost
import xgboost as xgb
xgb_model = xgb.XGBClassifier(random_state=2, use_label_encoder=False, eval_metric='logloss')



In [48]:
# Training the XGBoost model
xgb_model.fit(X_train, Y_train)

In [52]:
#Accuracy on training data
xgb_train_predictions = xgb_model.predict(X_train)
xgb_train_accuracy = accuracy_score(Y_train, xgb_train_predictions)
print('XGBoost Accuracy on Training Data:', xgb_train_accuracy)

XGBoost Accuracy on Training Data: 1.0


In [53]:
#Accuracy on testing data
xgb_test_predictions = xgb_model.predict(X_test)
xgb_test_accuracy = accuracy_score(Y_test, xgb_test_predictions)
print('XGBoost Accuracy on Testing Data:', xgb_test_accuracy)

XGBoost Accuracy on Testing Data: 0.9187817258883249


In [None]:
#The accuracy on the testing data is about 91.88%, which is quite high but noticeably lower than the training accuracy. This disparity typically confirms the suspicion of overfitting but to a lesser extent than what might be seen with simpler models like decision trees without ensemble methods.

In [69]:
# Precision Score on testing and training data
xgb_train_precision = precision_score(Y_train, xgb_train_predictions)
xgb_test_precision = precision_score(Y_test, xgb_test_predictions)

print('XGBoost Training Precision:', xgb_train_precision)
print('XGBoost Testing Precision:', xgb_test_precision)

XGBoost Training Precision: 1.0
XGBoost Testing Precision: 0.9555555555555556


In [70]:
# Recall Score on testing and training data
xgb_train_recall = recall_score(Y_train, xgb_train_predictions)
xgb_test_recall = recall_score(Y_test, xgb_test_predictions)

print('XGBoost Training Recall:', xgb_train_recall)
print('XGBoost Testing Recall:', xgb_test_recall)

XGBoost Training Recall: 1.0
XGBoost Testing Recall: 0.8775510204081632


In [71]:
# F1 Score on testing and training data
xgb_train_f1 = f1_score(Y_train, xgb_train_predictions)
xgb_test_f1 = f1_score(Y_test, xgb_test_predictions)

print('XGBoost Training F1 Score:', xgb_train_f1)
print('XGBoost Testing F1 Score:', xgb_test_f1)

XGBoost Training F1 Score: 1.0
XGBoost Testing F1 Score: 0.9148936170212767
