##  Détection de fraude financière

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

In [2]:
data = pd.read_csv('C:/Users/marcel.anee/Downloads/Synthetic_Financial_datasets_log.csv')

In [4]:
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


####   Structure de l'ensemble de données
step : Représente une unité de temps dans le monde réel, avec 1 pas équivalant à 1 heure. La simulation totale s'étend sur 744 étapes, soit l'équivalent de 30 jours.

type : les types de transactions incluent CASH-IN, CASH-OUT, DEBIT, PAYMENT et TRANSFER.

montant : le montant de la transaction dans la devise locale.

nameOrig : Le client qui initie la transaction.

oldbalanceOrg : Le solde initial avant la transaction.

newbalanceOrig : Le nouveau solde après la transaction.

nameDest : client destinataire de la transaction.

oldbalanceDest : le solde du destinataire initial avant la transaction. Non applicable pour les clients identifiés par 'M' (Commerçants).

newbalanceDest : le solde du nouveau destinataire après la transaction. Ne s'applique pas aux « M » (commerçants).

isFraud : identifie les transactions effectuées par des agents frauduleux visant à épuiser les comptes clients par le biais de transferts et de retraits.

isFlaggedFraud : signale les transferts non autorisés à grande échelle entre comptes, toute transaction dépassant 200 000 étant considérée comme illégale.

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6362620 entries, 0 to 6362619
Data columns (total 11 columns):
 #   Column          Dtype  
---  ------          -----  
 0   step            int64  
 1   type            object 
 2   amount          float64
 3   nameOrig        object 
 4   oldbalanceOrg   float64
 5   newbalanceOrig  float64
 6   nameDest        object 
 7   oldbalanceDest  float64
 8   newbalanceDest  float64
 9   isFraud         int64  
 10  isFlaggedFraud  int64  
dtypes: float64(5), int64(3), object(3)
memory usage: 534.0+ MB


In [6]:
df = data.copy()

In [7]:
df.describe()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
count,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0,6362620.0
mean,243.3972,179861.9,833883.1,855113.7,1100702.0,1224996.0,0.00129082,2.514687e-06
std,142.332,603858.2,2888243.0,2924049.0,3399180.0,3674129.0,0.0359048,0.001585775
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,156.0,13389.57,0.0,0.0,0.0,0.0,0.0,0.0
50%,239.0,74871.94,14208.0,0.0,132705.7,214661.4,0.0,0.0
75%,335.0,208721.5,107315.2,144258.4,943036.7,1111909.0,0.0,0.0
max,743.0,92445520.0,59585040.0,49585040.0,356015900.0,356179300.0,1.0,1.0


In [39]:
df0 = df[df['amount']>200000]
df0

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
15,1,CASH_OUT,229133.94,C905080434,15325.00,0.0,C476402209,5083.00,51513.44,0,0
19,1,TRANSFER,215310.30,C1670993182,705.00,0.0,C1100439041,22425.00,0.00,0,0
24,1,TRANSFER,311685.89,C1984094095,10835.00,0.0,C932583850,6267.00,2719172.89,0,0
82,1,TRANSFER,224606.64,C873175411,0.00,0.0,C766572210,354678.92,0.00,0,0
84,1,TRANSFER,379856.23,C1449772539,0.00,0.0,C1590550415,900180.00,19169204.93,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.0,C776919290,0.00,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.0,C1881841831,0.00,0.00,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.0,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.0,C2080388513,0.00,0.00,1,0


In [19]:
df0.groupby('type')['amount'].count()

type
CASH_IN     475868
CASH_OUT    788559
DEBIT           27
PAYMENT          6
TRANSFER    409110
Name: amount, dtype: int64

In [27]:
df0.groupby('type')['amount'].sum()

type
CASH_IN     1.486015e+11
CASH_OUT    2.558416e+11
DEBIT       8.158093e+06
PAYMENT     1.326107e+06
TRANSFER    4.729285e+11
Name: amount, dtype: float64

In [24]:
df1 = df[df['isFlaggedFraud']==1]
df1

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2736446,212,TRANSFER,4953893.08,C728984460,4953893.08,4953893.08,C639921569,0.0,0.0,1,1
3247297,250,TRANSFER,1343002.08,C1100582606,1343002.08,1343002.08,C1147517658,0.0,0.0,1,1
3760288,279,TRANSFER,536624.41,C1035541766,536624.41,536624.41,C1100697970,0.0,0.0,1,1
5563713,387,TRANSFER,4892193.09,C908544136,4892193.09,4892193.09,C891140444,0.0,0.0,1,1
5996407,425,TRANSFER,10000000.0,C689608084,19585040.37,19585040.37,C1392803603,0.0,0.0,1,1
5996409,425,TRANSFER,9585040.37,C452586515,19585040.37,19585040.37,C1109166882,0.0,0.0,1,1
6168499,554,TRANSFER,3576297.1,C193696150,3576297.1,3576297.1,C484597480,0.0,0.0,1,1
6205439,586,TRANSFER,353874.22,C1684585475,353874.22,353874.22,C1770418982,0.0,0.0,1,1
6266413,617,TRANSFER,2542664.27,C786455622,2542664.27,2542664.27,C661958277,0.0,0.0,1,1
6281482,646,TRANSFER,10000000.0,C19004745,10399045.08,10399045.08,C1806199534,0.0,0.0,1,1


In [34]:
df1 = df[df['isFraud']==1]
df1

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
2,1,TRANSFER,181.00,C1305486145,181.00,0.0,C553264065,0.00,0.00,1,0
3,1,CASH_OUT,181.00,C840083671,181.00,0.0,C38997010,21182.00,0.00,1,0
251,1,TRANSFER,2806.00,C1420196421,2806.00,0.0,C972765878,0.00,0.00,1,0
252,1,CASH_OUT,2806.00,C2101527076,2806.00,0.0,C1007251739,26202.00,0.00,1,0
680,1,TRANSFER,20128.00,C137533655,20128.00,0.0,C1848415041,0.00,0.00,1,0
...,...,...,...,...,...,...,...,...,...,...,...
6362615,743,CASH_OUT,339682.13,C786484425,339682.13,0.0,C776919290,0.00,339682.13,1,0
6362616,743,TRANSFER,6311409.28,C1529008245,6311409.28,0.0,C1881841831,0.00,0.00,1,0
6362617,743,CASH_OUT,6311409.28,C1162922333,6311409.28,0.0,C1365125890,68488.84,6379898.11,1,0
6362618,743,TRANSFER,850002.52,C1685995037,850002.52,0.0,C2080388513,0.00,0.00,1,0


In [35]:
df1.groupby('type')['amount','oldbalanceOrg','newbalanceOrig'].sum()

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,amount,oldbalanceOrg,newbalanceOrig
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CASH_OUT,5989202000.0,5984125000.0,298767.6
TRANSFER,6067213000.0,7564595000.0,1579822000.0


In [32]:
df1.groupby('type')['amount'].count()/len(df['amount'])

type
CASH_OUT    0.000647
TRANSFER    0.000644
Name: amount, dtype: float64

In [41]:
df['type'].unique()

array(['PAYMENT', 'TRANSFER', 'CASH_OUT', 'DEBIT', 'CASH_IN'],
      dtype=object)

In [46]:
df_tra = df.groupby('type')['amount'].sum()/df['amount'].sum()
df_tra

type
CASH_IN     0.206544
CASH_OUT    0.344648
DEBIT       0.000199
PAYMENT     0.024549
TRANSFER    0.424061
Name: amount, dtype: float64

CASH_IN : Dépôt d'argent (ou Entrée d'argent)

CASH_OUT : Retrait d'argent (ou Sortie d'argent)

DEBIT : Débit (ou Prélèvement)

PAYMENT : Paiement

TRANSFER : Transfert








In [49]:
df.loc[:, 'orgDiff'] = df['oldbalanceOrg'] - df['newbalanceOrig']
df.loc[:, 'destDiff'] = df['newbalanceDest'] - df['oldbalanceDest']

In [51]:
df.groupby('type')['orgDiff','destDiff','amount'].sum()

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,orgDiff,destDiff,amount
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CASH_IN,-236360300000.0,-169052300000.0,236367400000.0
CASH_OUT,63879760000.0,433108800000.0,394413000000.0
DEBIT,144418900.0,823158400.0,227199200.0
PAYMENT,13724250000.0,0.0,28093370000.0
TRANSFER,23529900000.0,525960500000.0,485292000000.0


In [52]:
df[['orgDiff','destDiff','amount']]

Unnamed: 0,orgDiff,destDiff,amount
0,9839.64,0.00,9839.64
1,1864.28,0.00,1864.28
2,181.00,0.00,181.00
3,181.00,-21182.00,181.00
4,11668.14,0.00,11668.14
...,...,...,...
6362615,339682.13,339682.13,339682.13
6362616,6311409.28,0.00,6311409.28
6362617,6311409.28,6311409.27,6311409.28
6362618,850002.52,0.00,850002.52
