<a href="https://colab.research.google.com/github/MatiasSiles/MastercardSalesOptimization/blob/main/Fraud_Detection_Transactions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Modelo de detección temprana de anomalías y fraudes financieros multivariado con series temporales, usando Deep Learning (LSTM/Transformer) y métodos bayesianos para estimar riesgo dinámico en transacciones Mastercard a nivel global.

Detección en tiempo real de fraudes invisibles de bajo monto, usando anomalías multivariada

In [61]:
import sqlite3
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

In [62]:
!unzip -q /content/ne_110m_admin_0_countries.zip

In [63]:
connection_db = sqlite3.connect("mastercard.db")

df_customers = pd.read_sql("SELECT * FROM customers", connection_db)
df_cards = pd.read_sql("SELECT * FROM cards", connection_db)
df_merchants = pd.read_sql("SELECT * FROM merchants", connection_db)
df_transactions = pd.read_sql("SELECT * FROM transactions", connection_db)
df_fraud_labels = pd.read_sql("SELECT * FROM fraud_labels", connection_db)

connection_db.close()

In [91]:
# @title
class fraud_analyzer():
  def __init__(self, * ,customers=None, transactions=None, cards=None, fraud_labels=None, merchants=None):
    self.customers = customers
    self.transactions = transactions
    self.cards = cards
    self.fraud_labels = fraud_labels
    self.merchants = merchants

  def Customers(self):
    fig, axes = plt.subplots(3,2, figsize=(16,8))
    fig.suptitle("Customers Analysis")

    # plot1
    axes[0,0].scatter(self.customers["age"], self.customers["income"], alpha=0.4)
    axes[0,0].set_xlabel("Age")
    axes[0,0].set_ylabel("Income")

    # plot2
    idcustomer_idtransaction_amount = pd.merge(self.cards, self.transactions, on="card_id")[["customer_id", "transaction_id","amount"]]
    income_vs_amount = pd.merge(idcustomer_idtransaction_amount, self.customers, on="customer_id")[["income", "amount"]]

    axes[0,1].scatter(income_vs_amount["income"], income_vs_amount["amount"], alpha=0.2)
    axes[0,1].set_xlabel("Income")
    axes[0,1].set_ylabel("Transaction Amount")

    # plot3
    axes[1,0].hist(idcustomer_idtransaction_amount["customer_id"], bins=50)
    axes[1,0].set_xlabel("Customer ID")
    axes[1,0].set_ylabel("Transaction Frequency")

    # plot4
    data = pd.merge(self.cards, self.transactions, on="card_id")[["customer_id","amount"]]
    data = data.groupby("customer_id").mean()

    axes[1,1].scatter(data.index, data["amount"], alpha=0.3)
    axes[1,1].set_xlabel("Customer ID")
    axes[1,1].set_ylabel("Average Transaction Amount")

    # plot5
    self.transactions["timestamp"] = pd.to_datetime(self.transactions["timestamp"])
    frequency_transaction_hour = self.transactions["timestamp"].dt.hour.sort_values()

    axes[2,0].hist(frequency_transaction_hour)
    axes[2,0].set_xlabel("Hour")
    axes[2,0].set_ylabel("Transaction Frequency")

    # plot6
    frequency_transaction_date = self.transactions["timestamp"].sort_values()
    frequency_transaction_date = self.transactions["timestamp"].dt.date
    frequency_transaction_date = pd.to_datetime(self.transactions["timestamp"])

    axes[2,1].hist(frequency_transaction_date, bins=80)
    axes[2,1].set_xlabel("Date")
    axes[2,1].set_ylabel("Transaction Frequency")

    fig.tight_layout()

  def transaction_customer_map(self):
    world_map = gpd.read_file("/content/ne_110m_admin_0_countries.shp")

    countries = self.customers["country"].value_counts().reset_index().rename(columns={"country":"ISO_A2"})
    world_map = pd.merge(world_map, countries, on="ISO_A2")

    world_map.plot(column="count", cmap="OrRd", legend=True, color="lightblue", figsize=(20,8))

  def fraud_distribution(self):
    data = self.fraud_labels["is_fraud"].value_counts()
    data.plot(kind="bar")
    frauds_total = data.iloc[data.index == 1].item()
    fraud_porcent = (frauds_total * 100) / len(self.fraud_labels)

    print(data)
    print(f"\nFraud Porcent: {fraud_porcent}")

  class check_customers_cards():
    def __init__(self, cards):
      fraud_analyzer.cards

    def status():
      data = fraud_analyzer.cards["status"].value_counts().plot(kind="bar")
      plt.xlabel("")
      plt.ylabel("Number of Customers")

    def card_types():
      data = fraud_analyzer.cards["card_type"].value_counts().plot(kind="bar")
      plt.ylabel("Number of Customers")

    def seeker():

      while True:

        prompt = int(input("Enter the client id or 0 to exit: "))
        print()

        if prompt == 0:
          break

        elif prompt in fraud_analyzer.cards["customer_id"].unique():
          print(fraud_analyzer.cards[fraud_analyzer.cards["customer_id"] == prompt])
          print("\n\n")

        else:
          print("Customer not found")
          print("\n\n")

  class Merchants():
    def __init__(self, merchants):
      fraud_analyzer.merchants

    def sold_categories_merchants():
      fraud_analyzer.merchants["category"].value_counts().plot(kind="bar")

    def most_dangerous_merchants():
      fraud_analyzer.merchants["risk_score"].plot(kind="hist") # risk_score indicate how many historical frauds has every merchant
      plt.xlabel("Risk Score")
      plt.title("Distribution Risks Scores by Merchants")

      print("\nTop 10 most dangerous merchants:")
      print(fraud_analyzer.merchants.sort_values(by="risk_score", ascending=False).head(10))

fraud_analyzer = fraud_analyzer(customers=df_customers, transactions=df_transactions, cards=df_cards, fraud_labels=df_fraud_labels, merchants=df_merchants)

In [None]:
class models():
  def __init__(self ,* ,X ,y ,):
    self.X = X
    self.y = y

  def logistic_regression(self):
    modelo()
    entrenamiento_modelo()
    return predicciones()
    # X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)

  def comparision_trees_models():
    pass

  def neural_networks():
    pass