In [61]:
#importando as bibliotecas
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [62]:
#importando o arquivo
df_credit = pd.read_csv("creditcard.csv")

In [63]:
#verificando a quantidade de linhas
print(len(df_credit))

284807


In [64]:
#verificando o dataframe
df_credit.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [65]:
#verificando os tipos dos dados dos atributos
df_credit.dtypes

Time      float64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
Class       int64
dtype: object

In [66]:
#verificando a existência de valores nulos
df_credit.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [67]:
#verificando as estatísticas descritivas das observações não fraude
df_nao_fraude = df_credit.Amount[df_credit.Class == 0]

df_nao_fraude.describe()

count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64

In [68]:
#verificando as estatísticas descritivas das observações fraude
df_fraude = df_credit.Amount[df_credit.Class == 1]

df_fraude.describe()

count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64

In [69]:
#verificando a distribuição das observações (em fraude e não fraude)
df_credit.Class.value_counts()

0    284315
1       492
Name: Class, dtype: int64

In [71]:
#armazenando apenas as observações fraude
df_fraude = df_credit[df_credit.Class == 1]
df_fraude

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [72]:
#armazenando apenas as observações não fraude
df_nao_fraude = df_credit[df_credit.Class == 0]
df_nao_fraude

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [73]:
#utilizando a técnica de undersampling
df_nao_fraudes = df_nao_fraude.sample(n=492) #randômico
df_nao_fraudes

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
2021,1562.0,-0.755694,0.890059,2.297006,0.669007,-0.578496,-0.047670,0.111612,0.048000,0.481642,...,0.065501,0.227281,-0.139697,0.373683,-0.078819,-0.488131,-0.280432,0.076675,9.99,0
90923,63235.0,-0.062196,-2.285160,-0.190045,1.506287,-0.554500,1.903214,0.060063,0.384575,0.844881,...,0.166241,-0.744687,-0.734458,-1.692755,0.296735,-0.387426,-0.044879,0.115344,651.30,0
92993,64207.0,-0.358567,-0.825828,1.236936,-1.584633,-0.223495,0.232184,-0.655195,0.127449,-2.895535,...,0.005431,0.274225,0.051949,-0.653111,-0.369366,-0.084303,0.164024,0.140513,52.56,0
247529,153605.0,2.034162,-0.219934,-1.212055,0.229131,-0.021588,-0.650617,-0.038320,-0.059248,0.520245,...,-0.270274,-0.740615,0.311825,-0.434858,-0.347984,0.208867,-0.081039,-0.077228,0.99,0
54551,46500.0,-0.931097,-0.943608,1.854455,-2.226019,-0.389762,1.778848,0.534094,0.425034,1.965833,...,0.252356,1.082228,0.234628,-0.890085,0.178260,0.105490,-0.118135,-0.212576,210.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11538,19905.0,1.377575,-0.587682,0.286722,-0.604861,-0.439117,0.472910,-0.890511,0.125100,0.730722,...,-0.157057,-0.128217,-0.094801,-0.859984,0.522132,-0.216533,-0.004389,-0.017386,5.00,0
261693,160142.0,-2.434917,1.945193,-0.280117,-0.397860,-1.094542,-0.204943,-0.795191,1.554444,-0.200519,...,0.020899,-0.353631,0.037572,-0.652486,-0.076477,-0.519212,-0.589365,-0.120345,25.00,0
130776,79429.0,1.007996,0.128932,0.411965,0.989933,0.328113,0.949796,-0.155394,0.343526,-0.058410,...,0.223976,0.849146,0.040917,-0.618387,0.332997,-0.190556,0.092549,0.013230,14.90,0
76917,56802.0,1.325143,-0.058738,-1.236520,-0.535874,2.073069,3.249471,-0.517386,0.775135,-0.046325,...,-0.012067,-0.137602,-0.110652,1.047376,0.680811,0.444496,-0.033062,0.003923,2.31,0


In [74]:
#concatenando os dois dataframes (df_nao_fraudes e df_fraude)
df = pd.concat([df_nao_fraudes, df_fraude], axis = 0)
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
2021,1562.0,-0.755694,0.890059,2.297006,0.669007,-0.578496,-0.047670,0.111612,0.048000,0.481642,...,0.065501,0.227281,-0.139697,0.373683,-0.078819,-0.488131,-0.280432,0.076675,9.99,0
90923,63235.0,-0.062196,-2.285160,-0.190045,1.506287,-0.554500,1.903214,0.060063,0.384575,0.844881,...,0.166241,-0.744687,-0.734458,-1.692755,0.296735,-0.387426,-0.044879,0.115344,651.30,0
92993,64207.0,-0.358567,-0.825828,1.236936,-1.584633,-0.223495,0.232184,-0.655195,0.127449,-2.895535,...,0.005431,0.274225,0.051949,-0.653111,-0.369366,-0.084303,0.164024,0.140513,52.56,0
247529,153605.0,2.034162,-0.219934,-1.212055,0.229131,-0.021588,-0.650617,-0.038320,-0.059248,0.520245,...,-0.270274,-0.740615,0.311825,-0.434858,-0.347984,0.208867,-0.081039,-0.077228,0.99,0
54551,46500.0,-0.931097,-0.943608,1.854455,-2.226019,-0.389762,1.778848,0.534094,0.425034,1.965833,...,0.252356,1.082228,0.234628,-0.890085,0.178260,0.105490,-0.118135,-0.212576,210.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [75]:
#ajustando o índice das observações
df.reset_index(inplace=True) #inplace = True pra fazer a alteração efetiva
df

Unnamed: 0,index,Time,V1,V2,V3,V4,V5,V6,V7,V8,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,2021,1562.0,-0.755694,0.890059,2.297006,0.669007,-0.578496,-0.047670,0.111612,0.048000,...,0.065501,0.227281,-0.139697,0.373683,-0.078819,-0.488131,-0.280432,0.076675,9.99,0
1,90923,63235.0,-0.062196,-2.285160,-0.190045,1.506287,-0.554500,1.903214,0.060063,0.384575,...,0.166241,-0.744687,-0.734458,-1.692755,0.296735,-0.387426,-0.044879,0.115344,651.30,0
2,92993,64207.0,-0.358567,-0.825828,1.236936,-1.584633,-0.223495,0.232184,-0.655195,0.127449,...,0.005431,0.274225,0.051949,-0.653111,-0.369366,-0.084303,0.164024,0.140513,52.56,0
3,247529,153605.0,2.034162,-0.219934,-1.212055,0.229131,-0.021588,-0.650617,-0.038320,-0.059248,...,-0.270274,-0.740615,0.311825,-0.434858,-0.347984,0.208867,-0.081039,-0.077228,0.99,0
4,54551,46500.0,-0.931097,-0.943608,1.854455,-2.226019,-0.389762,1.778848,0.534094,0.425034,...,0.252356,1.082228,0.234628,-0.890085,0.178260,0.105490,-0.118135,-0.212576,210.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
980,280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
981,280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
982,281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [76]:
#escolhendo as observações para a validação
df_val_nao_fraude = df.head(5)
df_val_fraude = df.tail(5)

In [77]:
#removendo as observações de validação do dataset
#removendo as 5 primeiras linhas (df_val_nao_fraude)
df = df.iloc[5:]

#removendo as 5 últimas linhas (df_val_fraude)
df = df[:-5]

df

Unnamed: 0,index,Time,V1,V2,V3,V4,V5,V6,V7,V8,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
5,130400,79346.0,1.225859,-0.524700,0.760468,-0.902467,-1.135976,-0.350815,-0.791824,0.226381,...,-0.000731,0.134439,-0.069333,-0.016877,0.459366,-0.678295,0.078870,0.018331,1.00,0
6,261825,160196.0,1.928276,-0.222370,-1.185748,0.241687,0.472690,0.339912,-0.085084,0.131156,...,-0.206936,-0.357918,0.431548,-1.039673,-0.533345,0.296885,-0.018721,-0.072248,0.99,0
7,95194,65201.0,1.301032,0.069544,-0.129825,0.258011,0.292424,0.256970,-0.103621,0.077215,...,-0.328878,-0.972894,-0.001384,-1.360381,0.295091,0.213629,-0.022265,0.000537,8.99,0
8,172983,121315.0,-1.858265,-0.455812,-2.841775,-3.733399,1.470957,3.326065,-2.248046,-1.040453,...,-1.047266,1.346686,0.515654,0.704628,-0.756911,-0.196102,0.314380,-0.149077,10.00,0
9,76450,56576.0,-1.772881,-0.576770,2.005393,-0.136205,2.094621,-1.507279,0.010222,-0.085572,...,-0.126209,-0.699722,0.085763,0.033397,0.213502,0.006128,-0.206353,-0.121731,0.89,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
974,274382,165981.0,-5.766879,-8.402154,0.056543,6.950983,9.880564,-5.773192,-5.748879,0.721743,...,0.880395,-0.130436,2.241471,0.665346,-1.890041,-0.120803,0.073269,0.583799,0.00,1
975,274475,166028.0,-0.956390,2.361594,-3.171195,1.970759,0.474761,-1.902598,-0.055178,0.277831,...,0.473211,0.719400,0.122458,-0.255650,-0.619259,-0.484280,0.683535,0.443299,39.90,1
976,275992,166831.0,-2.027135,-1.131890,-1.135194,1.086963,-0.010547,0.423797,3.790880,-1.155595,...,-0.315105,0.575520,0.490842,0.756502,-0.142685,-0.602777,0.508712,-0.091646,634.30,1
977,276071,166883.0,2.091900,-0.757459,-1.192258,-0.755458,-0.620324,-0.322077,-1.082511,0.117200,...,0.288253,0.831939,0.142007,0.592615,-0.196143,-0.136676,0.020182,-0.015470,19.95,1


In [78]:
#concatenando os dois dataframes de validação (df_val_nao_fraude e df_val_fraude)
df_val_total = pd.concat([df_val_nao_fraude, df_val_fraude], axis = 0)

df_val_total.reset_index(inplace=True)

df_val_total_real = df_val_total.Class #quero só o campo Class para validar

df_val_total

Unnamed: 0,level_0,index,Time,V1,V2,V3,V4,V5,V6,V7,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,2021,1562.0,-0.755694,0.890059,2.297006,0.669007,-0.578496,-0.04767,0.111612,...,0.065501,0.227281,-0.139697,0.373683,-0.078819,-0.488131,-0.280432,0.076675,9.99,0
1,1,90923,63235.0,-0.062196,-2.28516,-0.190045,1.506287,-0.5545,1.903214,0.060063,...,0.166241,-0.744687,-0.734458,-1.692755,0.296735,-0.387426,-0.044879,0.115344,651.3,0
2,2,92993,64207.0,-0.358567,-0.825828,1.236936,-1.584633,-0.223495,0.232184,-0.655195,...,0.005431,0.274225,0.051949,-0.653111,-0.369366,-0.084303,0.164024,0.140513,52.56,0
3,3,247529,153605.0,2.034162,-0.219934,-1.212055,0.229131,-0.021588,-0.650617,-0.03832,...,-0.270274,-0.740615,0.311825,-0.434858,-0.347984,0.208867,-0.081039,-0.077228,0.99,0
4,4,54551,46500.0,-0.931097,-0.943608,1.854455,-2.226019,-0.389762,1.778848,0.534094,...,0.252356,1.082228,0.234628,-0.890085,0.17826,0.10549,-0.118135,-0.212576,210.0,0
5,979,279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.88285,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.29268,0.147968,390.0,1
6,980,280143,169347.0,1.378559,1.289381,-5.004247,1.41185,0.442581,-1.326536,-1.41317,...,0.370612,0.028234,-0.14564,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
7,981,280149,169351.0,-0.676143,1.126366,-2.2137,0.468308,-1.120541,-0.003346,-2.234739,...,0.751826,0.834108,0.190944,0.03207,-0.739695,0.471111,0.385107,0.194361,77.89,1
8,982,281144,169966.0,-3.113832,0.585864,-5.39973,1.817092,-0.840618,-2.943548,-2.208002,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.2537,245.0,1
9,983,281674,170348.0,1.991976,0.158476,-2.583441,0.40867,1.151147,-0.096695,0.22305,...,-0.16435,-0.295135,-0.072173,-0.450261,0.313267,-0.289617,0.002988,-0.015309,42.53,1


In [79]:
#retirando os atributos que não vou utilizar
df_val_total = df_val_total.drop(['level_0','index','Time','Class'], axis=1)
df_val_total

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,-0.755694,0.890059,2.297006,0.669007,-0.578496,-0.04767,0.111612,0.048,0.481642,-0.32766,...,-0.129671,0.065501,0.227281,-0.139697,0.373683,-0.078819,-0.488131,-0.280432,0.076675,9.99
1,-0.062196,-2.28516,-0.190045,1.506287,-0.5545,1.903214,0.060063,0.384575,0.844881,-0.356625,...,1.141099,0.166241,-0.744687,-0.734458,-1.692755,0.296735,-0.387426,-0.044879,0.115344,651.3
2,-0.358567,-0.825828,1.236936,-1.584633,-0.223495,0.232184,-0.655195,0.127449,-2.895535,1.364843,...,0.119534,0.005431,0.274225,0.051949,-0.653111,-0.369366,-0.084303,0.164024,0.140513,52.56
3,2.034162,-0.219934,-1.212055,0.229131,-0.021588,-0.650617,-0.03832,-0.059248,0.520245,0.276677,...,-0.302552,-0.270274,-0.740615,0.311825,-0.434858,-0.347984,0.208867,-0.081039,-0.077228,0.99
4,-0.931097,-0.943608,1.854455,-2.226019,-0.389762,1.778848,0.534094,0.425034,1.965833,-2.206491,...,0.26298,0.252356,1.082228,0.234628,-0.890085,0.17826,0.10549,-0.118135,-0.212576,210.0
5,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.88285,0.697211,-2.064945,-5.587794,...,1.252967,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.29268,0.147968,390.0
6,1.378559,1.289381,-5.004247,1.41185,0.442581,-1.326536,-1.41317,0.248525,-1.127396,-3.232153,...,0.226138,0.370612,0.028234,-0.14564,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76
7,-0.676143,1.126366,-2.2137,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.65225,-3.463891,...,0.247968,0.751826,0.834108,0.190944,0.03207,-0.739695,0.471111,0.385107,0.194361,77.89
8,-3.113832,0.585864,-5.39973,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,-5.245984,...,0.306271,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.2537,245.0
9,1.991976,0.158476,-2.583441,0.40867,1.151147,-0.096695,0.22305,-0.068384,0.577829,-0.888722,...,-0.017652,-0.16435,-0.295135,-0.072173,-0.450261,0.313267,-0.289617,0.002988,-0.015309,42.53


In [80]:
#verificando a distribuição das observações fraude e não fraude
df.Class.value_counts()

0    487
1    487
Name: Class, dtype: int64

In [81]:
#separando labels e features
X = df.drop(['index','Time','Class'], axis=1)
y = df['Class']

In [82]:
#separando em dados de treino e de teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=Y)

In [83]:
#treinamento
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
pred = lr.predict(X_test)

In [84]:
#acurácia do treinamento
acc = accuracy_score(y_test, pred)
f'Acurácia do treinamento: {acc*100:.2f}%'

'Acurácia do treinamento: 95.38%'

In [28]:
#acurácia da validação
pred = lr.predict(df_val_total)
df = pd.DataFrame({'real':df_val_total_real, 'previsao':pred})
acc = accuracy_score(df_val_total_real, pred)

print(df)

f'Acurácia da validação: {acc*100:.2f}%'

   real  previsao
0     0         0
1     0         0
2     0         0
3     0         0
4     0         0
5     1         1
6     1         1
7     1         1
8     1         1
9     1         0


'Acurácia da validação: 90.00%'