In [0]:
!pip install python-dotenv

Collecting python-dotenv
  Obtaining dependency information for python-dotenv from https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl.metadata
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
from pyspark.sql.types import StructType, IntegerType, StringType
from pyspark.sql.functions import lit
import pandas as pd
import requests
import time
import threading
from datetime import datetime
from dotenv import load_dotenv
import os

In [0]:
load_dotenv()
CLIENT_ID = os.getenv("CLIENT_ID")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

In [0]:
# Função para obter dados da Twitch
def get_twitch_data():
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {ACCESS_TOKEN}"
    }
    url = 'https://api.twitch.tv/helix/games/top'
    response = requests.get(url, headers=headers)
    data = response.json().get('data', [])
    return pd.DataFrame(data)

# Definir esquema Spark
schema = StructType() \
    .add("id", StringType()) \
    .add("name", StringType()) \
    .add("box_art_url", StringType()) \
    .add("igdb_id", StringType()) 

# Criar tabela Delta com o esquema definido
spark.sql("""
    CREATE TABLE IF NOT EXISTS delta.`/mnt/raw/twitch/streams` (
        id STRING,
        name STRING,
        box_art_url STRING,
        igdb_id STRING,
        ingestion_time STRING
    ) USING DELTA
""")

# Ingestão contínua (streaming-like)
def stream_twitch_data():
    while True:
        pdf = get_twitch_data()
        if not pdf.empty:
            spark_df = spark.createDataFrame(pdf, schema=schema)
            spark_df = spark_df.withColumn("ingestion_time", lit(datetime.now().isoformat()))
            spark_df.write.mode("append").format("delta").save("/mnt/raw/twitch/streams")

            not_games = ["IRL", "Just Chatting", "Music", "Kings League", "Special Events", "Art", "ASMR"]
            filtered_spark_df = spark_df[~spark_df['name'].isin(not_games)]
            filtered_spark_df.write.mode("append").format("delta").save("/mnt/trusted/twitch/streams")
        time.sleep(60)  # A cada 1 minuto

# Iniciar como thread
threading.Thread(target=stream_twitch_data).start()

df_stream = spark.readStream.format("delta").load("/mnt/raw/twitch/streams")

df_stream.writeStream \
    .format("console") \
    .outputMode("append") \
    .start()

<pyspark.sql.streaming.query.StreamingQuery at 0x797ef01e66d0>

In [0]:
display(spark.read.format("delta").load("/mnt/trusted/twitch/streams"))

id,name,box_art_url,igdb_id,ingestion_time
32982,Grand Theft Auto V,https://static-cdn.jtvnw.net/ttv-boxart/32982_IGDB-{width}x{height}.jpg,1020.0,2025-06-13T02:02:13.935930
460630,Tom Clancy's Rainbow Six Siege X,https://static-cdn.jtvnw.net/ttv-boxart/460630-{width}x{height}.jpg,7360.0,2025-06-13T02:02:13.935930
1826300051,Mario Kart World,https://static-cdn.jtvnw.net/ttv-boxart/1826300051_IGDB-{width}x{height}.jpg,338067.0,2025-06-13T02:02:13.935930
33214,Fortnite,https://static-cdn.jtvnw.net/ttv-boxart/33214-{width}x{height}.jpg,1905.0,2025-06-13T02:02:13.935930
21779,League of Legends,https://static-cdn.jtvnw.net/ttv-boxart/21779-{width}x{height}.jpg,115.0,2025-06-13T02:02:13.935930
32399,Counter-Strike,https://static-cdn.jtvnw.net/ttv-boxart/32399-{width}x{height}.jpg,,2025-06-13T02:02:13.935930
1264310518,Marvel Rivals,https://static-cdn.jtvnw.net/ttv-boxart/1264310518_IGDB-{width}x{height}.jpg,294041.0,2025-06-13T02:02:13.935930
1987896617,Dune: Awakening,https://static-cdn.jtvnw.net/ttv-boxart/1987896617_IGDB-{width}x{height}.jpg,214394.0,2025-06-13T02:02:13.935930
512710,Call of Duty: Warzone,https://static-cdn.jtvnw.net/ttv-boxart/512710-{width}x{height}.jpg,131800.0,2025-06-13T02:02:13.935930
516575,VALORANT,https://static-cdn.jtvnw.net/ttv-boxart/516575-{width}x{height}.jpg,126459.0,2025-06-13T02:02:13.935930
