In [1]:
import pandas as pd
import multiprocessing
from multiprocessing import Pool

In [2]:
# Load the data

DATABASE_PATH = 'data/model_fraud_resume.csv'
df = pd.read_csv(DATABASE_PATH)

In [3]:
# Copy of the original dataframe to compare the results

df_original = df.copy()

In [4]:
#ENRICHMENT
# Create a new column with the difference between the initial balance and the amount of the transaction

df['deltaOrigen'] = (df['saldoInicialOrigen'] -df['monto']).abs()

In [5]:
# ALGORITHM FOR RETIROS
# first rule: if the transaction is a retiro, check if there is a transfer with the same amount in the last 3 time units

def check_for_retiro(row, df:pd.DataFrame):
    tiempo = row['unidadTiempo'] 
    tipo_transaccion=row['tipoTransaccion']
    monto_transaccion = row['monto']
    saldo_inicial_origen = row['saldoInicialOrigen']
    
    if tipo_transaccion == 'Retiro':    
        df = df[df['tipoTransaccion'] == 'Transferencia']
        df = df[(df['unidadTiempo'] == tiempo) | (df['unidadTiempo'] == tiempo-1) | (df['unidadTiempo'] == tiempo-2)]
        df = df[df['monto'] == monto_transaccion]
        
        if len(df) > 0:
            return 1
        
    return 0

        

In [6]:
# ALGORITHM FOR TRANSFERENCIAS
# first rule: if the transaction is a transferencia, check if the deltaOrigen is 0 

def check_for_transferencia(row):
    tipo_transaccion=row['tipoTransaccion']
    delta_origen = row['deltaOrigen']
    saldo_inicial_destinatario = row['saldoInicialDestinatario']
    saldo_final_destinatario = row['saldoFinalDestinatario']
    saldo_final_origen = row['saldoFinalOrigen']
    
    if tipo_transaccion == 'Transferencia':
        if delta_origen == 0:
            #if saldo_inicial_destinatario == 0 and saldo_final_destinatario == 0 and saldo_final_origen == 0:
                return 1
    
    return 0

In [7]:
# NEW COLUMN WITH THE RESULTS OF THE ALGORITHMS

df['marca_fraude_proyectada'] = 0

In [8]:
# Data frame just for retiro and transferencia
df_retiro = df[df['tipoTransaccion'] == 'Retiro']
df_transferencia = df[df['tipoTransaccion'] == 'Transferencia']

In [9]:
# Run the algorithms for 'Transferencia' for each row in the dataframe

i = 0
for index, row in df_transferencia.iterrows():
    marca: float = check_for_transferencia(row)
    df.at[index, 'marca_fraude_proyectada'] = marca
    i += marca

df_discrepancias_transferencia = df[(df['marca_fraude'] != df['marca_fraude_proyectada']) & (df['tipoTransaccion'] == 'Transferencia')]   
print('number of frauds projected for Transferencias:', i)   
print('number of discrepancies for Transferencias:', len(df_discrepancias_transferencia))

number of frauds projected for Transferencias: 32
number of discrepancies for Transferencias: 1


In [10]:
# Run the algorithms for 'Retiro' for each row in the dataframe

i = 0
for index, row in df_retiro.iterrows():
    marca = check_for_retiro(row, df_transferencia)
    df.at[index, 'marca_fraude_proyectada'] = marca
    i += marca
     
df_discrepancias_retiro = df[(df['marca_fraude'] != df['marca_fraude_proyectada']) & (df['tipoTransaccion'] == 'Retiro')] 

print('number of frauds projected for Retiros:', i)
print('number of discrepancies for Retiros:', len(df_discrepancias_retiro))

number of frauds projected for Retiros: 33
number of discrepancies for Retiros: 2
