# Data collection

As my goal is to learn data science, I want to use only data provided directly by Riot Games, the developers of League of Legends. I will utilize their API to extract data, ensuring that the data is as raw and unprocessed as possible.

Notes: 
-   Riot's API key for amateur developers is only valid for 24 hours after generation. Please remember to refresh your key regularly and store it in the `.env` file to ensure the project's code functions correctly.
-   The limit rate for requests is 20/seconds and 50/minutes

## First API call and size estimate

Since the request rate is limited by frequency and size, a good starting point for this project's data collection would be to estimate the volume of data needed. Refer to the [Scope Notebook](scope.ipynb) for more details about the project's scope.

It will also be a good time to get familiar with the API.

## Accessing a player by rank

In [None]:
from requests import request
import json
import os

API_KEY = os.getenv("RIOT_API_KEY")
QUEUE = "RANKED_SOLO_5x5"
REGION = "euw1"
TIER = "SILVER"                    # BRONZE, SILVER, GOLD, PLATINUM, DIAMOND, EMERALD, MASTER, GRANDMASTER, CHALLENGER
DIVISION = "II"                      # I, II, III, IV
PAGE = "1"


response = request(
    method="GET",
    url=f"https://{REGION}.api.riotgames.com/lol/league/v4/entries/{QUEUE}/{TIER}/{DIVISION}?page=1&api_key={API_KEY}",
    headers={
        "Accept": "application/json",
        "Content-Type": "application/json",
    },
) 

print(f"Status code :{response.status_code}")
if response.status_code == 200:
    data = response.json()
    print(f"Total entries: {len(data)}")
    print(f"First entry: {json.dumps(data[0], indent=4)}")
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    # Handle the error as needed

Status code :200
Total entries: 205
First entry: {
    "leagueId": "03fd6a29-e7d2-4075-96c6-123883759094",
    "queueType": "RANKED_SOLO_5x5",
    "tier": "SILVER",
    "rank": "II",
    "summonerId": "0uAIbD4U42x-9EmE4EB0pYmZK7aTxZ43dlf81m05Xstj4KLD",
    "puuid": "iE-VLPKxBxIALe5-17JYEvjOcPN_3UrAsMkLKcPugoNgW_utyd7J03fUn-l5e1c3hUFECqzUV4-VfA",
    "leaguePoints": 51,
    "wins": 4,
    "losses": 1,
    "veteran": false,
    "inactive": false,
    "freshBlood": false,
    "hotStreak": false
}


## Starting the data collection

I want to base my analysis on active players (50+ games) below Master tier. I will start by collecting the IDs of Diamond players, then work my way down through the ranks to Bronze. My reasoning is that higher-elo players are more likely to have played a larger number of games, which will help me gather my initial insights more quickly while continuing to collect data.

### Creating the ranked_players DB

Here we are collecting the rank data of each player from diamond to bronze

In [None]:
from sqlite3 import connect
from time import sleep, time
from requests import request
import json
import os

# This is an exmple of the api response we want to store in the database we will create a table based on this response
# Example response from the API

response_example = {
    "leagueId": "9d07f3b6-3331-49a3-b9ab-1f35f34273cf",
    "queueType": "RANKED_SOLO_5x5",
    "tier": "EMERALD",
    "rank": "I",
    "summonerId": "AnsLv6D7MGZrLZ_GrNPJBPaKZugw30KMPomE9Go3-cMK9P4a",
    "puuid": "ZrwXnHvkdDuUA3oKEgnte9tGWgAcr9MJvv1tX_50VgDGf7FXT0vEcCh51pD5_dtdOxvjwU0HZr2tjA",
    "leaguePoints": 7,
    "wins": 17,
    "losses": 12,
    "veteran": "false",
    "inactive": "false",
    "freshBlood": "false",
    "hotStreak": "false"
},

def create_table(db_path):
    conn = connect(db_path)
    cursor = conn.cursor()
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS ranked_players (
            league_id TEXT,
            queue_type TEXT,
            tier TEXT,
            rank TEXT,
            summoner_id TEXT PRIMARY KEY,
            puuid TEXT,
            league_points INTEGER,
            wins INTEGER,
            losses INTEGER,
            veteran TEXT,
            inactive TEXT,
            fresh_blood TEXT,
            hot_streak TEXT
        )
    """)
    conn.commit()
    conn.close()

def insert_data(db_path, data):
    conn = connect(db_path)
    cursor = conn.cursor()
    try:
        cursor = conn.cursor()
        for entry in data:
            cursor.execute(""" 
                INSERT OR REPLACE INTO ranked_players (
                    league_id, queue_type, tier, rank, summoner_id, puuid,
                    league_points, wins, losses, veteran, inactive,
                    fresh_blood, hot_streak
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, 
            (
                entry.get("leagueId"),
                entry.get("queueType"),
                entry.get("tier"),
                entry.get("rank"),
                entry.get("summonerId"),
                entry.get("puuid"),
                entry.get("leaguePoints"),
                entry.get("wins"),
                entry.get("losses"),
                entry.get("veteran"),
                entry.get("inactive"),
                entry.get("freshBlood"),
                entry.get("hotStreak")
            ))
        conn.commit()
    finally:
        conn.close()

def fetch_and_store_data(api_key, db_path,tier, division, page):
    response = request(
        method="GET",
        url=f"https://{REGION}.api.riotgames.com/lol/league/v4/entries/{QUEUE}/{tier}/{division}?page={page}&api_key={api_key}",
        headers={
            "Accept": "application/json",
            "Content-Type": "application/json",
        },
    ) 

    if response.status_code == 200:
        data = response.json()
        insert_data(db_path, data)
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        # Handle the error as needed

    return len(data)

def fetch_and_store_division(tier, division, page="1"):
    last_time = time()
    while fetch_and_store_data(API_KEY, db_path, tier, division, page) > 0:
        page = str(int(page) + 1)

        # To avoid hitting the API rate limit, we will sleep for the rest of the time available in the 1.2 second window
        sleep_duration = 1.21 - (time() - last_time)
        if sleep_duration > 0:
            sleep(sleep_duration)
        last_time = time()
    
    print(f"All data for {tier} {division} fetched and stored in the database.")


def fetch_and_store_all_divisions(tier, division="I", page="1"):
    divisions = ["I", "II", "III", "IV"]
    start = divisions.index(division)
    for division in divisions[start:]:
        print(f"Fetching data for {tier} {division}...")
        fetch_and_store_division(tier, division, page=page)

def fetch_and_store_all_tiers(tier="DIAMOND", division="I", page="1"):
    # We are excluding MASTER, GRANDMASTER and CHALLENGER tiers as they are apex tiers and have a very small number of players
    # same case for IRON and UNRANKED tiers
    
    tiers = ["DIAMOND", "EMERALD", "PLATINUM", "GOLD", "SILVER", "BRONZE", "IRON"]
    start = tiers.index(tier)
    for tier in tiers[start:]:
        print(f"Fetching data for {tier}...")
        fetch_and_store_all_divisions(tier, division=division, page=page)
        division = "I"  # Reset division to I for the next tier

def get_last_entry(db_path):
    conn = connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM ranked_players ORDER BY ROWID DESC LIMIT 1")
    last_entry = cursor.fetchone()
    conn.close()
    return last_entry

def get_last_entry_page(db_path, tier, division):
    # Since there is at most 205 entries per page, we can use this to get the last page number
    # We will refecth the data from the last page to get the latest data and avoid missing any entries
    conn = connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT COUNT(*) FROM ranked_players WHERE tier = ? AND rank = ?", (tier, division))
    count = cursor.fetchone()[0]
    conn.close()
    return count // 205 + 1


API_KEY = os.getenv("RIOT_API_KEY")
TIER = "DIAMOND"                    
DIVISION = "I"                      
PAGE = "1"
REGION = "euw1"
QUEUE = "RANKED_SOLO_5x5"


if __name__ == "__main__":
    db_path = "../data/ranked_players.db"
    create_table(db_path)

    last_entry = get_last_entry(db_path)
    if last_entry:
        print(f"Last entry in the database: {json.dumps(last_entry, indent=4)}")
        print(f"Fetching data starting from tier : {last_entry[2]} and division : {last_entry[3]}...")
        TIER = last_entry[2]
        DIVISION = last_entry[3]

        PAGE = get_last_entry_page(db_path, TIER, DIVISION)
        print(f"Last entry page: {PAGE}")
    else:
        print("No entries found in the database.")


    fetch_and_store_all_tiers(TIER, DIVISION, PAGE)
    
    print("All data fetched and stored in the database.")