In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, Normalizer, scale
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from  sklearn.metrics import precision_recall_fscore_support

In [78]:
creditcard = pd.read_csv('creditcard.csv')
creditcard.columns = [x.lower() for x in creditcard.columns]
creditcard.rename(columns = {'class' : 'fraud'}, inplace = True)

In [79]:
creditcard.fraud.value_counts(dropna = False)

0    284315
1       492
Name: fraud, dtype: int64

In [80]:
creditcard.groupby('fraud').amount.mean()

fraud
0     88.291022
1    122.211321
Name: amount, dtype: float64

In [81]:
creditcard.drop('time' , axis = 1, inplace = True)

In [82]:
scaler = StandardScaler()
creditcard['amount'] = scaler.fit_transform(creditcard['amount'].values.reshape(-1,1))

In [83]:
X = creditcard.iloc[:,:-1]
y = creditcard.iloc[:, -1]
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = .33, stratify = y, random_state = 1)

In [84]:
model = LogisticRegression()

In [85]:
model.fit(Xtrain, ytrain)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [86]:
def pandas_classification_report(y_true, y_pred):
    metrics_summary = precision_recall_fscore_support(
            y_true=y_true, 
            y_pred=y_pred)

    avg = list(precision_recall_fscore_support(
            y_true=y_true, 
            y_pred=y_pred,
            average='weighted'))

    metrics_sum_index = ['precision', 'recall', 'f1-score', 'support']
    class_report_df = pd.DataFrame(
        list(metrics_summary),
        index=metrics_sum_index)

    support = class_report_df.loc['support']
    total = support.sum() 
    avg[-1] = total

    class_report_df['avg / total'] = avg

    return class_report_df.T

In [87]:
model.fit(Xtrain, ytrain)
y_pred = model.predict(Xtest)

# And finally: the results
print(classification_report(ytest, y_pred))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00     93825
          1       0.89      0.67      0.76       162

avg / total       1.00      1.00      1.00     93987



In [88]:
model.score(Xtest, ytest)

0.99928713545490333

In [89]:
df_class_report = pandas_classification_report(y_true=ytest, y_pred=y_pred)
print(df_class_report)

             precision    recall  f1-score  support
0             0.999435  0.999851  0.999643  93825.0
1             0.886179  0.672840  0.764912    162.0
avg / total   0.999240  0.999287  0.999238  93987.0


In [91]:
df_class_report.to_csv('class.csv',  sep=',')

In [97]:
df = pd.DataFrame({'Class':y_pred, 'Feature' : Xtest['amount'] })
df.head()

Unnamed: 0,Class,Feature
180970,0,-0.093354
88377,0,0.031706
268802,0,-0.253277
109057,0,-0.349231
213415,0,-0.055892
