In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,mean_squared_error,r2_score

In [2]:
# Importando os dados de arquivo csv para um dataframe

df_credit = pd.read_csv(r'C:\Users\jhow_\OneDrive\Documentos\Fraud_Prevention\creditcard_2023.csv')

In [3]:
df_credit.head()

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,...,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1,0
1,1,0.9851,-0.356045,0.558056,-0.429654,0.27714,0.428605,0.406466,-0.133118,0.347452,...,-0.194936,-0.605761,0.079469,-0.577395,0.19009,0.296503,-0.248052,-0.064512,6531.37,0
2,2,-0.260272,-0.949385,1.728538,-0.457986,0.074062,1.419481,0.743511,-0.095576,-0.261297,...,-0.00502,0.702906,0.945045,-1.154666,-0.605564,-0.312895,-0.300258,-0.244718,2513.54,0
3,3,-0.152152,-0.508959,1.74684,-1.090178,0.249486,1.143312,0.518269,-0.06513,-0.205698,...,-0.146927,-0.038212,-0.214048,-1.893131,1.003963,-0.51595,-0.165316,0.048424,5384.44,0
4,4,-0.20682,-0.16528,1.527053,-0.448293,0.106125,0.530549,0.658849,-0.21266,1.049921,...,-0.106984,0.729727,-0.161666,0.312561,-0.414116,1.071126,0.023712,0.419117,14278.97,0


In [4]:
# Verificando se há valores nulos

df_credit.isnull().sum()

id        0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [5]:
# Verificando os dados de não fraudes

df_nao_fraude = df_credit.Amount[df_credit.Class == 0]

df_nao_fraude.describe()

count    284315.000000
mean      12026.313506
std        6929.500715
min          50.120000
25%        6034.540000
50%       11996.900000
75%       18040.265000
max       24039.930000
Name: Amount, dtype: float64

In [6]:
# Verificando os dados de fraudes

df_fraude = df_credit.Amount[df_credit.Class == 1]

df_fraude.describe()

count    284315.000000
mean      12057.601763
std        6909.750891
min          50.010000
25%        6074.640000
50%       12062.450000
75%       18033.780000
max       24039.930000
Name: Amount, dtype: float64

In [7]:
# Confirmando quantidade de registros de fraude e não fraudes

df_credit.Class.value_counts()

Class
0    284315
1    284315
Name: count, dtype: int64

In [8]:
# Definindo dataframes para validação


# Separando dados não fraudes
df_validacao_nao_fraude = df_credit[(df_credit['Class']==0)]
df_validacao_nao_fraude.head(10)

# Separando dados fraudes
df_validacao_fraude = df_credit[(df_credit['Class']==1)]
df_validacao_fraude.tail(10)

Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
568625,568625,-0.833437,0.061886,-0.899794,0.904227,-1.002401,0.481454,-0.370393,0.189694,-0.938153,...,0.167503,0.419731,1.288249,-0.900861,0.560661,-0.006018,3.308968,0.081564,4394.16,1
568626,568626,-0.670459,-0.202896,-0.068129,-0.267328,-0.13366,0.237148,-0.016935,-0.147733,0.483894,...,0.031874,0.388161,-0.154257,-0.846452,-0.153443,1.961398,-1.528642,1.704306,4653.4,1
568627,568627,-0.311997,-0.004095,0.137526,-0.035893,-0.042291,0.121098,-0.070958,-0.019997,-0.122048,...,0.140788,0.536523,-0.2111,-0.448909,0.540073,-0.755836,-0.48754,-0.268741,23572.85,1
568628,568628,0.636871,-0.51697,-0.300889,-0.14448,0.131042,-0.294148,0.580568,-0.207723,0.893527,...,-0.060381,-0.195609,-0.175488,-0.554643,-0.099669,-1.434931,-0.159269,-0.076251,10160.83,1
568629,568629,-0.795144,0.433236,-0.64914,0.374732,-0.244976,-0.603493,-0.347613,-0.340814,0.253971,...,0.534853,-0.291514,0.157303,0.93103,-0.349423,-1.090974,-1.575113,0.722936,21493.92,1


In [9]:
# Juntando os valores do dataframe sem os dados de validação
df = pd.concat([df_validacao_nao_fraude.iloc[10:],df_validacao_fraude.iloc[:-10]], axis=0)

df


Unnamed: 0,id,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
10,10,1.206014,-0.883519,1.021299,-1.410047,-0.341591,0.101491,0.036318,-0.141480,-0.348832,...,-0.106932,0.252151,0.004350,1.065933,0.330100,-0.289691,-0.188490,-0.060815,4741.71,0
11,11,0.409314,-0.245063,0.152566,-0.671316,1.874091,2.139539,0.608736,-0.026862,0.185819,...,-0.083222,0.180021,-0.014540,2.212670,-1.280261,-1.216546,-0.188929,-0.231608,14574.34,0
12,12,1.033525,-0.901634,0.676980,-1.319313,-0.216000,0.024725,0.222965,-0.203210,-0.501905,...,-0.197417,-0.472722,0.062430,0.832732,0.180102,-0.853823,-0.207514,0.002436,21696.89,0
13,13,0.886031,-0.349039,0.960931,0.333001,0.186379,0.664916,0.400542,-0.126042,0.366589,...,-0.118027,0.025738,-0.095727,0.240107,0.833120,0.248660,-0.213190,-0.048630,12157.97,0
14,14,-0.571544,-0.559877,1.609161,0.045297,0.201712,0.916651,0.298916,-0.549311,1.003897,...,0.336079,0.164591,1.068878,0.091185,-0.456405,-0.549904,-0.419742,-0.173067,508.29,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
568615,568615,-0.466652,0.398304,-0.714114,0.513052,-0.206537,-0.360422,0.400757,-0.070831,-0.340726,...,-0.074325,-0.210233,0.146760,-0.796232,0.471923,-0.759137,0.274380,-0.719030,7280.98,1
568616,568616,-0.120366,0.166819,-0.270633,-0.353998,0.468713,-0.321819,0.175881,-0.017534,-0.377283,...,0.066853,0.063864,-0.359510,-0.119500,0.504786,0.431325,0.170375,0.356759,16790.09,1
568617,568617,-0.471231,-1.803470,-0.449579,0.469756,-0.521422,0.314256,0.191152,-0.084771,0.044867,...,0.366286,-0.624392,-1.415688,-0.091027,0.326850,1.056809,0.122931,1.179464,10581.42,1
568618,568618,-0.581417,1.423379,-1.540101,2.065770,-0.609387,-1.535716,-1.290519,0.762154,-1.727404,...,0.605362,0.258693,0.306746,-1.911974,-1.119902,1.321076,2.599372,1.777184,18648.28,1


In [10]:
# Validando valores fraudes e não fraudes

df.Class.value_counts()

Class
0    284305
1    284305
Name: count, dtype: int64

In [11]:
df_validacao_nao_fraude = df_validacao_nao_fraude.head(10)
df_validacao_fraude = df_validacao_fraude.tail(10)


# Concatenando dataframe de validação

df_val_total = pd.concat([df_validacao_nao_fraude,df_validacao_fraude])

df_val_total.reset_index(inplace=True)
df_val_total_real = df_val_total.Class
df_val_total = df_val_total.drop(['index','id','Class'], axis=1)
df_val_total

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,-0.260648,-0.469648,2.496266,-0.083724,0.129681,0.732898,0.519014,-0.130006,0.727159,0.637735,...,0.091202,-0.110552,0.217606,-0.134794,0.165959,0.12628,-0.434824,-0.08123,-0.151045,17982.1
1,0.9851,-0.356045,0.558056,-0.429654,0.27714,0.428605,0.406466,-0.133118,0.347452,0.529808,...,-0.233984,-0.194936,-0.605761,0.079469,-0.577395,0.19009,0.296503,-0.248052,-0.064512,6531.37
2,-0.260272,-0.949385,1.728538,-0.457986,0.074062,1.419481,0.743511,-0.095576,-0.261297,0.690708,...,0.361652,-0.00502,0.702906,0.945045,-1.154666,-0.605564,-0.312895,-0.300258,-0.244718,2513.54
3,-0.152152,-0.508959,1.74684,-1.090178,0.249486,1.143312,0.518269,-0.06513,-0.205698,0.575231,...,-0.378223,-0.146927,-0.038212,-0.214048,-1.893131,1.003963,-0.51595,-0.165316,0.048424,5384.44
4,-0.20682,-0.16528,1.527053,-0.448293,0.106125,0.530549,0.658849,-0.21266,1.049921,0.968046,...,0.247237,-0.106984,0.729727,-0.161666,0.312561,-0.414116,1.071126,0.023712,0.419117,14278.97
5,0.025302,-0.140514,1.191138,-0.707979,0.43049,0.458973,0.61105,-0.092629,0.180811,0.451788,...,-0.0764,-0.187739,-0.538518,-0.050465,-0.631553,-0.45648,0.25267,0.066681,0.095812,6901.49
6,1.016482,-0.397181,0.497868,-0.144463,0.331022,0.629243,0.431262,-0.134007,0.796159,0.557015,...,-0.390343,-0.171137,-0.287017,-0.178197,-1.297597,1.182503,-0.604228,-0.198163,-0.087619,18954.45
7,-0.051306,-0.007194,1.139941,-0.87788,0.684668,0.714326,0.892615,-0.908409,0.901938,1.258025,...,-0.324758,0.620676,-0.920426,0.03466,-1.091527,-0.742075,-0.104863,-1.382522,-2.748268,12298.23
8,-0.13068,-0.349547,0.425786,-0.760444,1.702777,2.324816,0.568968,0.0491,0.273118,0.437518,...,-0.109164,-0.132787,-0.2847,-0.227779,2.248754,0.534846,-0.929738,-0.224385,0.24379,22052.9
9,0.058419,-0.093507,1.11727,-0.735172,0.466111,0.332371,0.683425,-0.136674,0.096409,0.453469,...,0.043428,-0.203634,-0.601581,-0.145082,-0.654783,-0.196621,0.226818,0.057119,0.100629,210.35


In [12]:
X = df.drop(['id','Class'], axis=1)
Y = df['Class']

In [13]:
print(X,Y)

              V1        V2        V3        V4        V5        V6        V7  \
10      1.206014 -0.883519  1.021299 -1.410047 -0.341591  0.101491  0.036318   
11      0.409314 -0.245063  0.152566 -0.671316  1.874091  2.139539  0.608736   
12      1.033525 -0.901634  0.676980 -1.319313 -0.216000  0.024725  0.222965   
13      0.886031 -0.349039  0.960931  0.333001  0.186379  0.664916  0.400542   
14     -0.571544 -0.559877  1.609161  0.045297  0.201712  0.916651  0.298916   
...          ...       ...       ...       ...       ...       ...       ...   
568615 -0.466652  0.398304 -0.714114  0.513052 -0.206537 -0.360422  0.400757   
568616 -0.120366  0.166819 -0.270633 -0.353998  0.468713 -0.321819  0.175881   
568617 -0.471231 -1.803470 -0.449579  0.469756 -0.521422  0.314256  0.191152   
568618 -0.581417  1.423379 -1.540101  2.065770 -0.609387 -1.535716 -1.290519   
568619 -0.443917  0.243549  0.126811  0.400266 -0.326371  0.517670  0.086289   

              V8        V9       V10  .

In [14]:
# Separando dados de treino e dados de teste

X_treino, X_teste, Y_treino, Y_teste = train_test_split(X,Y, test_size=0.2, random_state=13, stratify=Y )

In [15]:
# Treinando com logistic Regression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_treino,Y_treino)
pred = lr.predict(X_teste)
acc = accuracy_score(Y_teste, pred)

f'Acurácia:{acc * 100:2f}'

'Acurácia:95.955928'

In [16]:
# Validando

pred = lr.predict(df_val_total)

df = pd.DataFrame({'real':df_val_total_real, 'previsão': pred})

df

Unnamed: 0,real,previsão
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
5,0,0
6,0,0
7,0,0
8,0,0
9,0,0
