In [1]:
import pandas as pd
import sqlalchemy
import requests
import time
import datetime

In [2]:
# Replace with your Twitch API credentials
CLIENT_ID = "0bshbmlnl24klcvy4d9jw5qpyi8zir"
CLIENT_SECRET = "s7gvpxddel72bqllnet73cv1ygwi69"

# Function to get OAuth Token
def get_twitch_token():
    url = "https://id.twitch.tv/oauth2/token"
    params = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials"
    }
    response = requests.post(url, params=params)

    if response.status_code != 200:
        print("‚ùå Error getting token:", response.status_code, response.text)
        return None

    return response.json().get("access_token")

# Fetch token
TOKEN = get_twitch_token()
if not TOKEN:
    print("‚ùå Failed to authenticate with Twitch API.")
    exit()

# Define API Headers
HEADERS = {
    "Client-ID": CLIENT_ID,
    "Authorization": f"Bearer {TOKEN}"
}

print("‚úÖ Twitch API Authentication Successful!")

‚úÖ Twitch API Authentication Successful!


In [3]:
# Function to get top games dynamically
def get_top_games(limit=30):
    url = "https://api.twitch.tv/helix/games/top"
    params = {"first": limit}
    response = requests.get(url, headers=HEADERS, params=params)
    
    if response.status_code != 200:
        print("‚ùå Error fetching top games:", response.status_code, response.text)
        return []

    games = response.json().get("data", [])
    return [game["name"] for game in games]

# Get top 30 games dynamically
top_games = get_top_games()
print(f"üéÆ Top {len(top_games)} Games Fetched from Twitch API!")

# Manually selected diverse games
manual_games = [
    "Stardew Valley", "Hollow Knight", "Dark Souls 3", "Terraria", 
    "The Sims 4", "Cities: Skylines", "Final Fantasy XIV", "Tekken 8",
    "Street Fighter 6", "Age of Empires IV", "Elden Ring", "Rocket League",
    "Dead by Daylight", "Palworld", "Enshrouded", "Cyberpunk 2077",
    "Resident Evil 4", "The Legend of Zelda: Breath of the Wild",
    "Super Smash Bros. Ultimate", "Call of Duty: Warzone"
]

# Combine top games with manually selected ones
games_list = top_games + manual_games
print(f"üéÆ Total Games to Analyze: {len(games_list)}")


üéÆ Top 30 Games Fetched from Twitch API!
üéÆ Total Games to Analyze: 50


In [4]:
# Function to fetch Game ID from Twitch API
def get_game_id(game_name):
    url = "https://api.twitch.tv/helix/games"
    params = {"name": game_name}
    response = requests.get(url, headers=HEADERS, params=params)

    if response.status_code != 200:
        print(f"‚ùå Error fetching Game ID for {game_name}: {response.status_code}")
        return None

    data = response.json()
    if "data" in data and len(data["data"]) > 0:
        return data["data"][0]["id"]

    print(f"‚ùå Game '{game_name}' not found!")
    return None

In [5]:
# Function to fetch all small streamers playing a specific game (with pagination)
def get_small_streamers(game_name):
    print(f"üîç Fetching Small Streamers for: {game_name}")

    game_id = get_game_id(game_name)
    if not game_id:
        return []

    url = "https://api.twitch.tv/helix/streams"
    params = {
        "game_id": game_id,
        "first": 100  # ‚úÖ Fetch max 100 results per request
    }

    streamers = []
    cursor = None  # ‚úÖ Used for pagination

    while True:
        if cursor:
            params["after"] = cursor  # Add cursor for next page

        response = requests.get(url, headers=HEADERS, params=params)

        if response.status_code != 200:
            print("‚ùå Error fetching streamers:", response.status_code, response.text)
            break  

        data = response.json()

        for stream in data.get("data", []):
            if 5 <= stream.get("viewer_count", 0) <= 100:  # ‚úÖ Filter small streamers
                streamers.append({
                    "streamer_name": stream["user_name"],
                    "game": game_name,
                    "viewers": stream["viewer_count"],
                    "start_time": stream["started_at"],
                    "user_login": stream["user_login"], 
                    "stream_id": stream["id"],
                    "language": stream["language"],  # ‚úÖ Added Language
                    "title": stream["title"],  # ‚úÖ Added Stream Title
                    "tags": ", ".join(stream.get("tags", [])) if isinstance(stream.get("tags", []), list) else ""  # ‚úÖ Fixed Tags
                })

        cursor = data.get("pagination", {}).get("cursor")
        if not cursor:  
            break  

    print(f"‚úÖ {len(streamers)} Small Streamers Found for {game_name}")
    return streamers


In [6]:
# Function to calculate stream duration
def calculate_stream_duration(start_time):
    start_time = datetime.datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ")
    current_time = datetime.datetime.utcnow()
    duration = current_time - start_time
    return duration.total_seconds() // 60  # Convert to minutes



In [7]:
all_streamers = []

for game in games_list:
    small_streamers = get_small_streamers(game)

    # ‚úÖ Compute stream duration before adding to dataset
    for streamer in small_streamers:
        streamer["duration"] = calculate_stream_duration(streamer["start_time"])

    all_streamers.extend(small_streamers)

print(f"\n‚úÖ Total Streamers Collected: {len(all_streamers)}")

# ‚úÖ Compute Game-Level Metrics (New Additions)
df_streamers = pd.DataFrame(all_streamers)

# Compute total viewers per game
game_viewers = df_streamers.groupby("game")["viewers"].sum().reset_index()
game_viewers.columns = ["game", "total_game_viewers"]

# Compute total active streams per game
game_streams = df_streamers.groupby("game")["stream_id"].count().reset_index()
game_streams.columns = ["game", "active_streams"]

# Merge with main dataframe
df_streamers = df_streamers.merge(game_viewers, on="game", how="left")
df_streamers = df_streamers.merge(game_streams, on="game", how="left")

# Compute average viewers per game
df_streamers["avg_viewers_per_game"] = df_streamers["total_game_viewers"] / df_streamers["active_streams"]


üîç Fetching Small Streamers for: Just Chatting
‚úÖ 2366 Small Streamers Found for Just Chatting
üîç Fetching Small Streamers for: League of Legends


  current_time = datetime.datetime.utcnow()


‚úÖ 701 Small Streamers Found for League of Legends
üîç Fetching Small Streamers for: Counter-Strike
‚úÖ 655 Small Streamers Found for Counter-Strike
üîç Fetching Small Streamers for: Grand Theft Auto V
‚úÖ 1122 Small Streamers Found for Grand Theft Auto V
üîç Fetching Small Streamers for: Assassin's Creed Shadows
‚úÖ 949 Small Streamers Found for Assassin's Creed Shadows
üîç Fetching Small Streamers for: Fortnite
‚úÖ 1145 Small Streamers Found for Fortnite
üîç Fetching Small Streamers for: VALORANT
‚úÖ 1100 Small Streamers Found for VALORANT
üîç Fetching Small Streamers for: World of Warcraft
‚úÖ 792 Small Streamers Found for World of Warcraft
üîç Fetching Small Streamers for: inZOI
‚úÖ 169 Small Streamers Found for inZOI
üîç Fetching Small Streamers for: Dota 2
‚úÖ 368 Small Streamers Found for Dota 2
üîç Fetching Small Streamers for: Marvel Rivals
‚úÖ 466 Small Streamers Found for Marvel Rivals
üîç Fetching Small Streamers for: Overwatch 2
‚úÖ 391 Small Streamers Found for

In [8]:
# ‚úÖ Display the first 10 rows of the DataFrame
from IPython.display import display
display(df_streamers.head(10))

# ‚úÖ Save the data to a CSV file for easy review
df_streamers.to_csv("twitch_small_streamers.csv", index=False)

print("\n‚úÖ Data saved as 'twitch_small_streamers.csv'. You can open it to check the full dataset.")


Unnamed: 0,streamer_name,game,viewers,start_time,user_login,stream_id,language,title,tags,duration,total_game_viewers,active_streams,avg_viewers_per_game
0,taybisox,Just Chatting,72,2025-03-20T16:10:23Z,taybisox,319354193149,ru,"–î–û–ë–†–´–ô –ê–£–ö –ù–ê –§–ò–õ–¨–ú/ANIME (–±–µ–∑ –ø—Ä–∞–≤–∏–ª, 1 –ø—Ä–∞–≤–∏...","–†—É—Å—Å–∫–∏–π, –¥–µ–≤—É—à–∫–∞, –æ–±—â–µ–Ω–∏–µ, girl, Cosplay, –∞–Ω–∏–º...",195.0,55380,2366,23.406593
1,Â∞èÁÜôËÇâÁµ≤,Just Chatting,99,2025-03-20T17:47:15Z,sophroseeeeee,315130774264,zh,3/21 ÊïëÂëΩ ÂúçÂ∑æÊâπÁôº,"‰∏≠Êñá, Â•∂Ëå∂ÂçÉÊùØ‰∏çÈÜâ, È¶ôÊ∞¥Êî∂ËóèÂÆ∂, kpop, Âπ¥Â∫¶ÁÜ¨Â§ú‰∫ûËªç, ÂÑÄÂºèÊÑü",98.0,55380,2366,23.406593
2,Felikah,Just Chatting,97,2025-03-20T18:47:30Z,felikah,318151777788,de,üî¥ PUMMEL PARTY ACTION üî¥ ELDEN RING üî¥ GODRICK D...,"Deutsch, gaming, GHG, 7tv",38.0,55380,2366,23.406593
3,GetOnMyLvLTV,Just Chatting,96,2025-03-19T12:06:37Z,getonmylvltv,317412970361,de,24.07.2020 - Numero Uno | 24/7 alte Streams & ...,Deutsch,1879.0,55380,2366,23.406593
4,LaserLent,Just Chatting,100,2025-03-20T17:31:13Z,laserlent,318146482172,es,"DALAS se VUELVE TOTALMETE LOCO, FRANK CUESTA E...","Espa√±ol, videojuegos, videojuegos, actualidad",114.0,55380,2366,23.406593
5,kitnip_,Just Chatting,100,2025-03-20T17:16:08Z,kitnip_,317471611385,en,first ever IRL strim | !kofi !socials,"Girl, English, Swedish, Chatty",129.0,55380,2366,23.406593
6,niconing,Just Chatting,100,2025-03-20T16:27:49Z,niconing,318143839868,ko,[replay/VOD] pretty nicoü§ç üíó (eng/tw),ÌïúÍµ≠Ïñ¥,177.0,55380,2366,23.406593
7,bioyundan,Just Chatting,100,2025-03-20T04:17:47Z,bioyundan,317448700921,tr,Videolarla championstv! 7/24 bant yayƒ±n...,"t√ºrk√ße, T√ºrk√ße",907.0,55380,2366,23.406593
8,Wudjer,Just Chatting,100,2025-03-20T12:38:51Z,wudjer,317461084153,ru,üî¥ –°–º–æ—Ç—Ä–∏–º –∫–∞–∫–æ–π-—Ç–æ —Ñ–∏–ª—å–º . –ù–∞—á–∞–ª–æ –≤ 20:00 ‚Üí –†–æ...,"–†—É—Å—Å–∫–∏–π, DropsEnable, Pvp, –ö—Ä–∞—Å–∏–≤—ã–π–º—É–∂—á–∏–Ω–∞, –≥–µ...",406.0,55380,2366,23.406593
9,TuikassZ,Just Chatting,100,2025-03-20T14:31:03Z,tuikassz,314524179444,pt,LIVE BOA COLA,Portugu√™s,294.0,55380,2366,23.406593



‚úÖ Data saved as 'twitch_small_streamers.csv'. You can open it to check the full dataset.
