In [1]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from pyiceberg.catalog import load_catalog
from dotenv import load_dotenv
import os
import datetime
import requests

load_dotenv()

client_id = os.getenv("STREAMS_CHARTS_CLINET_ID")
token = os.getenv("STREAMS_CHARTS_TOKEN")
s3_bucket = os.getenv("S3_BUCKET")
s3_prefix = os.getenv("S3_PREFIX")
glue_database = os.getenv("GLUE_DATABASE")
glue_table = os.getenv("GLUE_TABLE")

url = "https://streamscharts.com/api/jazz/channels?platform=twitch&time=7-days"
headers = {"Client-ID": client_id, "Token": token}
response = requests.get(url, headers=headers)

if not response.status_code == 200:
    print(f"Failed to retrieve data: {response.status_code}")
    exit()


data = response.json()
df = pd.DataFrame(data['data'])

start_date = datetime.datetime.today().date() - datetime.timedelta(days=datetime.datetime.today().weekday())
end_date = start_date + datetime.timedelta(days=6)
week_number = start_date.isocalendar()[1]
year = start_date.year

df['start_date'] = start_date
df['end_date'] = end_date
df['year'] = year
df['week_number'] = week_number

In [2]:
df

Unnamed: 0,platform,channel_name,channel_display_name,channel_id,hours_watched,peak_viewers,average_viewers,airtime_in_m,followers_gain,live_views,last_streamed_game,avatar_url,channel_country,stream_language,partnership_status,channel_type,start_date,end_date,year,week_number
0,twitch,kaicenat,KaiCenat,641972806,19385765,272552,115622,10060,409115,,Just Chatting,https://static-cdn.jtvnw.net/jtv_user_pictures...,US,en,partner,Male,2024-11-18,2024-11-24,2024,47
1,twitch,auronplay,auronplay,459331509,3236050,277925,133446,1455,70559,20305995,Minecraft,https://static-cdn.jtvnw.net/jtv_user_pictures...,ES,es,partner,Male,2024-11-18,2024-11-24,2024,47
2,twitch,rubius,Rubius,39276140,1993621,226606,62301,1920,54826,14246963,Minecraft,https://static-cdn.jtvnw.net/jtv_user_pictures...,ES,es,partner,Male,2024-11-18,2024-11-24,2024,47
3,twitch,eliasn97,eliasn97,238813810,1699649,86661,36228,2815,4043,650500,Special Events,https://static-cdn.jtvnw.net/jtv_user_pictures...,DE,de,partner,Male,2024-11-18,2024-11-24,2024,47
4,twitch,rainbow6,Rainbow6,65171890,1581562,66571,30222,3140,15451,3284661,Tom Clancy's Rainbow Six Siege,https://static-cdn.jtvnw.net/jtv_user_pictures...,,en,partner,Organization,2024-11-18,2024-11-24,2024,47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,twitch,gronkh,GRONKH,12875057,347556,41457,21610,965,1558,,Just Chatting,https://static-cdn.jtvnw.net/jtv_user_pictures...,DE,de,partner,Male,2024-11-18,2024-11-24,2024,47
96,twitch,quin69,Quin69,56649026,347405,10974,6903,3020,1105,227264,Magicraft,https://static-cdn.jtvnw.net/jtv_user_pictures...,NZ,en,partner,Male,2024-11-18,2024-11-24,2024,47
97,twitch,plaqueboymax,plaqueboymax,672238954,345320,22208,12153,1705,10202,,Just Chatting,https://static-cdn.jtvnw.net/jtv_user_pictures...,US,en,partner,Male,2024-11-18,2024-11-24,2024,47
98,twitch,mira,mira,79294007,344654,3434,2057,10060,2164,,Just Chatting,https://static-cdn.jtvnw.net/jtv_user_pictures...,RU,en,partner,Female,2024-11-18,2024-11-24,2024,47


In [3]:
from pyiceberg.catalog import load_catalog
from dotenv import load_dotenv
import os

load_dotenv()

client_id = os.getenv("STREAMS_CHARTS_CLINET_ID")
token = os.getenv("STREAMS_CHARTS_TOKEN")
s3_bucket = os.getenv("S3_BUCKET")
s3_prefix = os.getenv("S3_PREFIX")
glue_database = os.getenv("GLUE_DATABASE")
glue_table = os.getenv("GLUE_TABLE")

# Load Iceberg catalog and table
catalog = load_catalog(
    "glue",
    **{
        "type": "GLUE",
        "glue.region": "eu-west-2",
    }
)
table = catalog.load_table(f"{glue_database}.{glue_table}")