## Importing dependencies

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Loading the dataset

In [None]:
credit_card_data = pd.read_csv("creditcard.csv")

In [None]:
credit_card_data.head(5)

In [None]:
credit_card_data.tail(5)

## Exploring the dataset

In [None]:
credit_card_data.info()

In [None]:
credit_card_data.describe()

In [None]:
credit_card_data.isna().sum()

## Distribution of legit and fraudulent transaction

In [None]:
val_count = credit_card_data.Class.value_counts()
num_of_fradulent_transaction = val_count[1] 

##### Data is highly unbalanced
0 -> normal transaction(284315)\
1 -> fraud transaction(492)

#### Separating the data for analysis

In [None]:
legit_data = credit_card_data[credit_card_data.Class == 0]
fraud_data = credit_card_data[credit_card_data.Class == 1]

In [None]:
legit_data.Amount.describe()

In [None]:
fraud_data.Amount.describe()

#### Compare the values for both types of transactions

In [None]:
credit_card_data.groupby('Class').mean()

#### Under-Sampling
Build a sample dataset containing similar distribution of normal transaction and fraudulent transaction

In [None]:
legit_sample = legit_data.sample(n=num_of_fradulent_transaction)

In [None]:
balanced_dataset = pd.concat([legit_sample, fraud_data], axis=0)

In [None]:
balanced_dataset.head()

In [None]:
balanced_dataset.tail()

In [None]:
balanced_dataset['Class'].value_counts()

In [None]:
balanced_dataset.groupby('Class').mean()

#### Splitting the data

In [None]:
X = balanced_dataset.drop(columns='Class', axis=1)
y = balanced_dataset['Class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2)

#### Model Training

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train, y_train)

#### Model Evaluation

In [None]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, y_train)

In [None]:
print(training_data_accuracy)

In [None]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, y_test)

In [None]:
print(test_data_accuracy)