# 📂 Parte 1 Análise Estática (Spark SQL + PostgreSQL + S3)

### 1 - Realize a leitura da tabela apostas do PostgreSQL e transforme a coluna timestamp corretamente.

In [1]:
from pyspark.sql.functions import to_timestamp
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Leitura PostgreSQL") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.2.27,org.postgresql:postgresql:42.2.27,org.apache.hadoop:hadoop-aws:3.3.2,com.amazonaws:aws-java-sdk-bundle:1.11.1026,org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0") \
    .getOrCreate()

url = "jdbc:postgresql://localhost:5432/betalert"
properties = {
    "user": "admin",
    "password": "admin",
    "driver": "org.postgresql.Driver"
}

table = "apostas"

df = spark.read.jdbc(url=url, table=table, properties=properties)

# convertendo a data timestamp de string para timestamp
df = df.withColumn("timestamp", to_timestamp("timestamp", "yyyy-MM-dd HH:mm:ss"))

df.printSchema()
df.show(10)
spark.stop()


:: loading settings :: url = jar:file:/home/mateus/.local/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/mateus/.ivy2/cache
The jars for the packages stored in: /home/mateus/.ivy2/jars
org.postgresql#postgresql added as a dependency
org.postgresql#postgresql added as a dependency
org.apache.hadoop#hadoop-aws added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-81ac969b-70ab-4991-b8d8-87c512a61a2b;1.0
	confs: [default]
	found org.postgresql#postgresql;42.2.27 in central
	found org.checkerframework#checker-qual;3.5.0 in central
	found org.apache.hadoop#hadoop-aws;3.3.2 in central
	found com.amazonaws#aws-java-sdk-bundle;1.11.1026 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.5.0 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.5.0 in central
	found org.apache.kafka#kafka-clients;3.4.1 in central
	fou

root
 |-- aposta_id: string (nullable = true)
 |-- apostador_id: string (nullable = true)
 |-- jogo_id: string (nullable = true)
 |-- valor: decimal(38,18) (nullable = true)
 |-- odd: decimal(38,18) (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- resultado: string (nullable = true)



                                                                                

+---------+------------+-------+--------------------+--------------------+-------------------+---------+
|aposta_id|apostador_id|jogo_id|               valor|                 odd|          timestamp|resultado|
+---------+------------+-------+--------------------+--------------------+-------------------+---------+
| b61f4f08|         u79| jogo31|1813.570000000000...|4.490000000000000000|2025-01-05 15:42:00|   perdeu|
| 88c44f52|         u23| jogo33|830.6000000000000...|2.870000000000000000|2025-01-05 09:54:00| pendente|
| 4c32051c|         u72| jogo72|1829.920000000000...|2.090000000000000000|2025-01-03 23:34:00| pendente|
| 1baa492c|         u70| jogo67|1614.200000000000...|4.210000000000000000|2025-01-04 16:50:00|   perdeu|
| 2820b7bb|         u40| jogo50|989.7300000000000...|2.980000000000000000|2025-01-02 20:37:00|   perdeu|
| 9f953950|         u10| jogo66|1786.550000000000...|3.300000000000000000|2025-01-04 13:58:00|   perdeu|
| 1549ba26|         u95| jogo42|683.4800000000000...|3.

### 2 - Realize a leitura da tabela transacoes_financeiras e normalize o nome da coluna de valor.

In [2]:
from pyspark.sql.types import DecimalType
from pyspark.sql import SparkSession
from pyspark.sql.functions import round, col

spark = SparkSession.builder \
    .appName("Leitura PostgreSQL") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.2.27") \
    .getOrCreate()

url = "jdbc:postgresql://localhost:5432/betalert"
properties = {
    "user": "admin",
    "password": "admin",
    "driver": "org.postgresql.Driver"
}

table = "transacoes_financeiras"

df = spark.read.jdbc(url=url, table=table, properties=properties)

# convertendo a coluna para manter só dois valores decimais, mas sem arredondamento para manter o valor real
df = df.withColumn("valor", col("valor").cast(DecimalType(10, 2)))

df.printSchema()
df.show(10)
spark.stop()


root
 |-- id: integer (nullable = true)
 |-- apostador_id: string (nullable = true)
 |-- valor: decimal(10,2) (nullable = true)
 |-- tipo: string (nullable = true)
 |-- data: timestamp (nullable = true)



                                                                                

+---+------------+--------+--------+-------------------+
| id|apostador_id|   valor|    tipo|               data|
+---+------------+--------+--------+-------------------+
|  1|         u69|14890.81|deposito|2025-01-01 10:00:00|
|  2|         u94| 5616.48|deposito|2025-01-01 10:01:00|
|  3|         u95|16376.42|   saque|2025-01-01 10:02:00|
|  4|         u11|15335.80|deposito|2025-01-01 10:03:00|
|  5|         u88|19559.30|   saque|2025-01-01 10:04:00|
|  6|         u32| 4797.95|deposito|2025-01-01 10:05:00|
|  7|         u14| 1494.86|   saque|2025-01-01 10:06:00|
|  8|         u23|15609.39|deposito|2025-01-01 10:07:00|
|  9|          u8|14916.72|deposito|2025-01-01 10:08:00|
| 10|         u92|12104.17|   saque|2025-01-01 10:09:00|
+---+------------+--------+--------+-------------------+
only showing top 10 rows



### 3 - Faça o join entre apostas e o arquivo apostadores.csv do S3 para incluir o país e dados extras.

In [3]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Leitura PostgreSQL") \
    .config("spark.hadoop.fs.s3a.access.key", "admin") \
    .config("spark.hadoop.fs.s3a.secret.key", "admin123") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://localhost:9000") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .getOrCreate()

url = "jdbc:postgresql://localhost:5432/betalert"
properties = {
    "user": "admin",
    "password": "admin",
    "driver": "org.postgresql.Driver"
}

# Em tese, o join faz uma combinação de cada saque por aposta, então seria necessário só comparar
# as diferenças entre saque e depósito.

transactions = spark.read.jdbc(url=url, table="transacoes_financeiras", properties=properties)
bets = spark.read.jdbc(url=url, table="apostas", properties=properties)

apostadores = spark.read.csv("s3a://betalogs/apostadores.csv", header=True, inferSchema=True)

# Renomeando as colunas para facilitar os joins

bets = bets.withColumnRenamed("valor", "bet_valor")
transactions = transactions.withColumnRenamed("valor", "transaction_valor")

bets = bets.withColumnRenamed("pais", "pais_bet")
transactions = transactions.withColumnRenamed("pais", "pais_transacao")

# bets -> transactions

bets_transactions = transactions.join(
    bets, transactions.apostador_id == bets.apostador_id, "inner"
).drop(bets["apostador_id"])

# bets -> transactions -> better

resultado_final = bets_transactions.join(
    apostadores, bets_transactions.apostador_id == apostadores.id, "inner"
).drop(apostadores["id"])

resultado_final.show(10)


25/06/02 22:26:02 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties


+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
| id|apostador_id|   transaction_valor|    tipo|               data|aposta_id|jogo_id|           bet_valor|                 odd|          timestamp|resultado|      nome|               email|pais|
+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
|411|         u42|12961.78000000000...|deposito|2025-01-01 16:50:00| be7645ac|  jogo2|1776.830000000000...|4.700000000000000000|2025-01-03 11:29:00| pendente|Jogador 42|jogador42@exemplo...|  ES|
|403|         u42|3849.670000000000...|deposito|2025-01-01 16:42:00| be7645ac|  jogo2|1776.830000000000...|4.700000000000000000|2025-01-03 11:29:00| pendente|Jogador 42|jogador42@exemplo...|  ES|
|277|         u42|13

# 🔍 Parte 2 Detecção de Padrões

Observando se uma aposta é Flash por meio da subtração de duas time stamps em unix (segundos desde 1970)

In [4]:
from pyspark.sql.functions import unix_timestamp, abs

deposits = resultado_final.where(resultado_final.tipo == "deposito")

# depositos onde a diferença entre data e timestamp é menor que 10 segundos
flash_deposits = deposits.where(
    abs(unix_timestamp("data") - unix_timestamp("timestamp")) < 10
)

flash_deposits.show(20)

+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
| id|apostador_id|   transaction_valor|    tipo|               data|aposta_id|jogo_id|           bet_valor|                 odd|          timestamp|resultado|      nome|               email|pais|
+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
|505|         u73|15000.00000000000...|deposito|2025-01-11 20:04:00| db280beb|  jogo3|15000.00000000000...|24.54000000000000...|2025-01-11 20:04:07|   ganhou|Jogador 73|jogador73@exemplo...|  ES|
|510|         u66|15000.00000000000...|deposito|2025-01-11 20:09:00| 0c7defda|  jogo6|15000.00000000000...|12.23000000000000...|2025-01-11 20:09:09|   ganhou|Jogador 66|jogador66@exemplo...|  AR|
|173|         u19|85

Exiba apostas-relâmpago com valor acima de R$500.

In [5]:
gt_500_flash_deposits = flash_deposits.where(flash_deposits.bet_valor > 500)
gt_500_flash_deposits.show(20)

+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
| id|apostador_id|   transaction_valor|    tipo|               data|aposta_id|jogo_id|           bet_valor|                 odd|          timestamp|resultado|      nome|               email|pais|
+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
|505|         u73|15000.00000000000...|deposito|2025-01-11 20:04:00| db280beb|  jogo3|15000.00000000000...|24.54000000000000...|2025-01-11 20:04:07|   ganhou|Jogador 73|jogador73@exemplo...|  ES|
|510|         u66|15000.00000000000...|deposito|2025-01-11 20:09:00| 0c7defda|  jogo6|15000.00000000000...|12.23000000000000...|2025-01-11 20:09:09|   ganhou|Jogador 66|jogador66@exemplo...|  AR|
|173|         u19|85

Exiba apostas-relâmpago com valor acima de R$10.000.

In [6]:
gt_10000_flash_deposits = flash_deposits.where(flash_deposits.bet_valor > 10000)
gt_10000_flash_deposits.show()

+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
| id|apostador_id|   transaction_valor|    tipo|               data|aposta_id|jogo_id|           bet_valor|                 odd|          timestamp|resultado|      nome|               email|pais|
+---+------------+--------------------+--------+-------------------+---------+-------+--------------------+--------------------+-------------------+---------+----------+--------------------+----+
|505|         u73|15000.00000000000...|deposito|2025-01-11 20:04:00| db280beb|  jogo3|15000.00000000000...|24.54000000000000...|2025-01-11 20:04:07|   ganhou|Jogador 73|jogador73@exemplo...|  ES|
|510|         u66|15000.00000000000...|deposito|2025-01-11 20:09:00| 0c7defda|  jogo6|15000.00000000000...|12.23000000000000...|2025-01-11 20:09:09|   ganhou|Jogador 66|jogador66@exemplo...|  AR|
|508|         u99|15

Detecte jogadores que realizaram 10 ou mais apostas em um mesmo jogo.

In [7]:
from pyspark.sql.functions import count

heavy_bettors = bets.groupBy("apostador_id", "jogo_id") \
    .agg(count("*").alias("num_apostas")) \
    .where(col("num_apostas") >= 10)

heavy_bettors.show()

+------------+-------+-----------+
|apostador_id|jogo_id|num_apostas|
+------------+-------+-----------+
|         u88| jogo77|         13|
+------------+-------+-----------+



Exiba o total e a média de valores apostados por país.

In [8]:
from pyspark.sql.functions import sum, avg

bets_by_country = resultado_final.groupBy("pais").agg(
    sum("bet_valor").alias("total_apostado"),
    avg("bet_valor").alias("media_apostada")
)

bets_by_country.show()

spark.stop()


                                                                                

+----+--------------------+--------------------+
|pais|      total_apostado|      media_apostada|
+----+--------------------+--------------------+
|  PT|3343853.040000000...|1029.511403940886...|
|  BR|5107855.500000000...|1078.744561774023...|
|  ES|7238634.930000000...|1082.169970100164...|
|  FR|6773318.370000000...|1025.017913135593...|
|  AR|4756412.960000000...|1100.512022211938...|
+----+--------------------+--------------------+



# 📡 Parte 3 Streaming em Tempo Real (Kafka + Spark Structured Streaming)


Modifiquei pesadamente o docker compose para conseguir usar o jupyter notebook no projeto. Extrai os scripts em python da imagem, e removi a network do docker compose, mapeando todas as portas na minha máquina, já que não foi possível consumir o kafka na rede virtual do docker

In [25]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, from_json, when, avg, count
from pyspark.sql.types import StructType, StringType, TimestampType, FloatType

spark = SparkSession.builder \
    .appName("KafkaStreamingApp") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.2.27,org.apache.hadoop:hadoop-aws:3.3.2,com.amazonaws:aws-java-sdk-bundle:1.11.1026,org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0") \
    .config("spark.hadoop.fs.s3a.access.key", "admin") \
    .config("spark.hadoop.fs.s3a.secret.key", "admin123") \
    .config("spark.hadoop.fs.s3a.endpoint", "http://localhost:9000") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "false") \
    .getOrCreate()

# Removendo o log level para limpar o streaming
spark.sparkContext.setLogLevel("WARN")

kafka_stream = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092,localhost:9093") \
    .option("subscribe", "stream_apostas") \
    .load()

# Leitura do CSV com apostadores na S3
bettors_df = spark.read.csv("s3a://betalogs/apostadores.csv", header=True, inferSchema=True)

# Schema da mensagem Kafka
message_schema = StructType() \
    .add("bet_id", StringType()) \
    .add("bettor_id", StringType()) \
    .add("game_id", StringType()) \
    .add("amount", FloatType()) \
    .add("odd", FloatType()) \
    .add("timestamp", TimestampType())

# Parse do JSON + watermark (antes do join)
parsed_stream = kafka_stream.selectExpr("CAST(value AS STRING) as json_str") \
    .select(from_json(col("json_str"), message_schema).alias("data")) \
    .select("data.*") \
    .withWatermark("timestamp", "10 minutes")

# Join com dados estáticos (apostadores)
joined_df = parsed_stream.join(bettors_df, bettors_df.id == parsed_stream.bettor_id, "inner")

# Coluna de suspeita
enriched_df = joined_df.withColumn(
    "suspicious",
    when((col("amount") > 12000) & (col("odd") > 15), True).otherwise(False)
)

# Agregações por país (modo update)
aggregated_by_country = enriched_df.groupBy("pais").agg(
    avg("amount").alias("avg_amount"),
    count(when(col("suspicious") == True, "suspicious")).alias("suspicious_bets"),
    count("*").alias("total_bets")
)

# Contagem de apostas suspeitas por apostador (modo update)
suspicious_by_bettor = enriched_df.groupBy("bettor_id").agg(
    count(when(col("suspicious") == True, "suspicious")).alias("suspicious_bets")
)

query_country = aggregated_by_country.writeStream \
    .format("console") \
    .outputMode("update") \
    .trigger(processingTime="1 minute") \
    .option("truncate", False) \
    .start()


query_country.awaitTermination()

25/06/02 22:59:33 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-2e1a2f6c-cddf-4d1f-9b4f-0d033339c045. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
25/06/02 22:59:33 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
25/06/02 22:59:33 WARN AdminClientConfig: These configurations '[key.deserializer, value.deserializer, enable.auto.commit, max.poll.records, auto.offset.reset]' were supplied but are not used yet.
                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 34
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+----+-----------+----------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id  |nome       |email                 |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+----+-----------+----------------------+----+--------+
|9ed420dd |u30         |jogo76 |1458.48 |9.73 |2025-06-03 01:59:00.564999|u30 |Jogador 30 |jogador30@exemplo.com |BR  |false   |
|d72c5d1d |u57         |jogo55 |2412.9  |2.74 |2025-06-03 01:59:00.571436|u57 |Jogador 57 |jogador57@exemplo

                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 35
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+----+-----------+----------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id  |nome       |email                 |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+----+-----------+----------------------+----+--------+
|1ba5d9bd |u48         |jogo7  |1632.67|5.66|2025-06-03 02:00:02.8681  |u48 |Jogador 48 |jogador48@exemplo.com |BR  |false   |
|5ec60a04 |u72         |jogo39 |1374.2 |7.21|2025-06-03 02:00:02.868657|u72 |Jogador 72 |jogador72@exemplo.com |ES  

                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 36
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|e66c16ea |u80         |jogo93 |2074.72|8.01|2025-06-03 02:01:00.722174|u80|Jogador 80|jogador80@exemplo.com|FR  |false   |
|4ef4c607 |u40         |jogo90 |3902.73|6.7 |2025-06-03 02:01:00.722763|u40|Jogador 40|jogador40@exemplo.com|FR  |false   |
|3c6

                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 37
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+----+-----------+----------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id  |nome       |email                 |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+----+-----------+----------------------+----+--------+
|a2661ec7 |u51         |jogo79 |3431.33|3.67|2025-06-03 02:02:00.64982 |u51 |Jogador 51 |jogador51@exemplo.com |BR  |false   |
|104f1a99 |u100        |jogo69 |295.23 |4.88|2025-06-03 02:02:02.298898|u100|Jogador 100|jogador100@exemplo.com|BR  

                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 38
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|b92744b5 |u75         |jogo77 |4153.71 |2.91 |2025-06-03 02:03:00.600609|u75|Jogador 75|jogador75@exemplo.com|FR  |false   |
|2eaae7db |u53         |jogo59 |184.56  |3.18 |2025-06-03 02:03:00.614651|u53|Jogador 53|jogador53@exemplo.com|FR  |fals

                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 39
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|bb0edc3d |u3          |jogo67 |19575.16|18.94|2025-06-03 02:04:01.374004|u3 |Jogador 3 |jogador3@exemplo.com |PT  |true    |
|a4bdfde9 |u34         |jogo59 |2042.97 |4.64 |2025-06-03 02:04:01.39406 |u34|Jogador 34|jogador34@exemplo.com|BR  |fals

                                                                                

-------------------------------------------
Batch: 6
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 40
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|299f130c |u75         |jogo39 |4427.52|2.33|2025-06-03 02:05:01.843275|u75|Jogador 75|jogador75@exemplo.com|FR  |false   |
|ee47726b |u55         |jogo43 |2124.98|9.31|2025-06-03 02:05:01.843721|u55|Jogador 55|jogador55@exemplo.com|BR  |false   |
|af4

                                                                                

-------------------------------------------
Batch: 7
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 41
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|53b06f92 |u32         |jogo2  |377.18 |2.87|2025-06-03 02:06:00.946929|u32|Jogador 32|jogador32@exemplo.com|FR  |false   |
|99372249 |u51         |jogo4  |4907.09|1.42|2025-06-03 02:06:00.958099|u51|Jogador 51|jogador51@exemplo.com|BR  |false   |
|bd4

                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 42
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|a82465df |u12         |jogo69 |2898.75|8.13|2025-06-03 02:07:02.138037|u12|Jogador 12|jogador12@exemplo.com|FR  |false   |
|d55e0f7a |u37         |jogo98 |957.18 |2.33|2025-06-03 02:07:02.157956|u37|Jogador 37|jogador37@exemplo.com|BR  |false   |
|be9

                                                                                

-------------------------------------------
Batch: 9
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 43
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|6b1a402e |u2          |jogo56 |1355.35 |7.48 |2025-06-03 02:08:00.346539|u2 |Jogador 2 |jogador2@exemplo.com |FR  |false   |
|38dd609b |u81         |jogo100|4254.86 |7.52 |2025-06-03 02:08:03.068659|u81|Jogador 81|jogador81@exemplo.com|ES  |fals

                                                                                

-------------------------------------------
Batch: 10
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 44
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|0ceda44b |u76         |jogo37 |1871.32|1.9 |2025-06-03 02:09:01.381664|u76|Jogador 76|jogador76@exemplo.com|AR  |false   |
|df2ee4e6 |u35         |jogo48 |1612.18|7.86|2025-06-03 02:09:01.386559|u35|Jogador 35|jogador35@exemplo.com|FR  |false   |
|62

                                                                                

-------------------------------------------
Batch: 11
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 45
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|23e9383a |u75         |jogo79 |2632.15 |5.06 |2025-06-03 02:10:01.082947|u75|Jogador 75|jogador75@exemplo.com|FR  |false   |
|a1298316 |u59         |jogo3  |2275.49 |8.74 |2025-06-03 02:10:01.089853|u59|Jogador 59|jogador59@exemplo.com|FR  |fal

                                                                                

-------------------------------------------
Batch: 12
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 46
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|614c4c84 |u91         |jogo93 |2278.01 |8.54 |2025-06-03 02:11:02.46065 |u91|Jogador 91|jogador91@exemplo.com|FR  |false   |
|cdfdf632 |u77         |jogo62 |246.91  |9.15 |2025-06-03 02:11:02.487505|u77|Jogador 77|jogador77@exemplo.com|PT  |fal

                                                                                

-------------------------------------------
Batch: 13
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 47
-------------------------------------------
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor   |odd  |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+--------+-----+--------------------------+---+----------+---------------------+----+--------+
|64cf020b |u25         |jogo24 |1595.11 |1.74 |2025-06-03 02:12:01.991491|u25|Jogador 25|jogador25@exemplo.com|AR  |false   |
|7baa6f76 |u34         |jogo70 |4860.74 |2.87 |2025-06-03 02:12:01.99229 |u34|Jogador 34|jogador34@exemplo.com|BR  |fal

                                                                                

-------------------------------------------
Batch: 14
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 48
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|437a0350 |u84         |jogo29 |4450.88|6.44|2025-06-03 02:13:00.51099 |u84|Jogador 84|jogador84@exemplo.com|AR  |false   |
|0f378df8 |u31         |jogo46 |951.83 |9.29|2025-06-03 02:13:00.513159|u31|Jogador 31|jogador31@exemplo.com|AR  |false   |
|fe

                                                                                

-------------------------------------------
Batch: 15
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+

-------------------------------------------
Batch: 49
-------------------------------------------
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|aposta_id|apostador_id|jogo_id|valor  |odd |timestamp                 |id |nome      |email                |pais|suspeita|
+---------+------------+-------+-------+----+--------------------------+---+----------+---------------------+----+--------+
|d5500ae2 |u66         |jogo72 |2914.36|6.51|2025-06-03 02:14:00.866977|u66|Jogador 66|jogador66@exemplo.com|AR  |false   |
|d3e850b2 |u46         |jogo69 |1844.81|4.41|2025-06-03 02:14:00.872498|u46|Jogador 46|jogador46@exemplo.com|ES  |false   |
|a7

                                                                                

-------------------------------------------
Batch: 16
-------------------------------------------
+----+----------+---------------+----------+
|pais|avg_amount|suspicious_bets|total_bets|
+----+----------+---------------+----------+
+----+----------+---------------+----------+



ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/home/mateus/.local/lib/python3.10/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/home/mateus/.local/lib/python3.10/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/usr/lib/python3.10/socket.py", line 705, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


KeyboardInterrupt: 

repositório no github: https://github.com/MateusGurgel/Infnet-Works/tree/main/creation_of_big_data_solutions_with_spark/tp-3