In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
# loading the dataset to a Pandas DataFrame
credit_card_data = pd.read_csv('/content/credit_data.csv')

In [None]:
# first 5 rows of the dataset
credit_card_data.head()

In [None]:
# dataset informations
credit_card_data.info()

In [None]:
# checking the number of missing values in each column
credit_card_data.isnull().sum()

In [None]:
# distribution of legit transactions & fraudulent transactions
credit_card_data['Class'].value_counts()

In [None]:
# separating the data for analysis
legit = credit_card_data[credit_card_data.Class == 0]
fraud = credit_card_data[credit_card_data.Class == 1]

In [None]:
print(legit.shape)
print(fraud.shape)

In [None]:
# statistical measures of the data
legit.Amount.describe()

In [None]:
fraud.Amount.describe()

In [None]:
# compare the values for both transactions
credit_card_data.groupby('Class').mean()

In [None]:
legit_sample = legit.sample(n=492)

In [None]:
new_dataset = pd.concat([legit_sample, fraud], axis=0)

In [None]:
new_dataset.head()

In [None]:
new_dataset['Class'].value_counts()

In [None]:
new_dataset.groupby('Class').mean()

In [None]:
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
model = LogisticRegression()

In [None]:
# training the Logistic Regression Model with Training Data
model.fit(X_train, Y_train)

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [None]:
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [None]:
print('Accuracy score on Test Data : ', test_data_accuracy)