In [1]:
import pandas as pd
import os
import requests
import boto3
from io import BytesIO

**EXTRACTION**

GET SUMMONER PUUID BY SUMMONER NAME AND TAGLINE

In [2]:
api_key = os.environ.get("ETL-LOL_API")

summ_name = "TATIAN"
tag_line = "LAS"

params = {
    'api_key': api_key
}

servers = {
    'AMERICAS': 'americas.api.riotgames.com',
    'ASIA': 'asia.api.riotgames.com',
    'EUROPE': 'europe.api.riotgames.com',   
    'SEA': 'sea.api.riotgames.com'
}

endpoint = f"https://americas.api.riotgames.com/riot/account/v1/accounts/by-riot-id/{summ_name}/{tag_line}"

res = requests.get(endpoint, params= params)
data = res.json()

puuid = data['puuid']

data

{'puuid': 'rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2PZHM8e-C6NFI9ejEKb-p2aKQkB9p6JJ0w',
 'gameName': 'TATIAN',
 'tagLine': 'LAS'}

GET SUMMONER DATA BY PUUID

In [3]:
endpoint = f"https://la2.api.riotgames.com/lol/summoner/v4/summoners/by-puuid/{puuid}"

res = requests.get(endpoint, params=params)
data = res.json()

summ_id = data['id']

data

{'id': 'lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw',
 'accountId': '3SsoFKJ2HArAyEROZF7-cn0zHxd1m-rxRYxt9MScpr1oaME',
 'puuid': 'rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2PZHM8e-C6NFI9ejEKb-p2aKQkB9p6JJ0w',
 'profileIconId': 5675,
 'revisionDate': 1731213102000,
 'summonerLevel': 362}

GET CHAMPIONS LIST

In [4]:
champions_endpoint = "https://ddragon.leagueoflegends.com/cdn/14.22.1/data/en_US/champion.json"

data = requests.get(champions_endpoint).json()

df_champions = pd.DataFrame(data['data'])

df_champions = df_champions.T

df_champions.head()

Unnamed: 0,version,id,key,name,title,blurb,info,image,tags,partype,stats
Aatrox,14.22.1,Aatrox,266,Aatrox,the Darkin Blade,Once honored defenders of Shurima against the ...,"{'attack': 8, 'defense': 4, 'magic': 3, 'diffi...","{'full': 'Aatrox.png', 'sprite': 'champion0.pn...",[Fighter],Blood Well,"{'hp': 650, 'hpperlevel': 114, 'mp': 0, 'mpper..."
Ahri,14.22.1,Ahri,103,Ahri,the Nine-Tailed Fox,Innately connected to the magic of the spirit ...,"{'attack': 3, 'defense': 4, 'magic': 8, 'diffi...","{'full': 'Ahri.png', 'sprite': 'champion0.png'...","[Mage, Assassin]",Mana,"{'hp': 590, 'hpperlevel': 104, 'mp': 418, 'mpp..."
Akali,14.22.1,Akali,84,Akali,the Rogue Assassin,Abandoning the Kinkou Order and her title of t...,"{'attack': 5, 'defense': 3, 'magic': 8, 'diffi...","{'full': 'Akali.png', 'sprite': 'champion0.png...",[Assassin],Energy,"{'hp': 600, 'hpperlevel': 119, 'mp': 200, 'mpp..."
Akshan,14.22.1,Akshan,166,Akshan,the Rogue Sentinel,"Raising an eyebrow in the face of danger, Aksh...","{'attack': 0, 'defense': 0, 'magic': 0, 'diffi...","{'full': 'Akshan.png', 'sprite': 'champion0.pn...","[Marksman, Assassin]",Mana,"{'hp': 630, 'hpperlevel': 107, 'mp': 350, 'mpp..."
Alistar,14.22.1,Alistar,12,Alistar,the Minotaur,Always a mighty warrior with a fearsome reputa...,"{'attack': 6, 'defense': 9, 'magic': 5, 'diffi...","{'full': 'Alistar.png', 'sprite': 'champion0.p...","[Tank, Support]",Mana,"{'hp': 685, 'hpperlevel': 120, 'mp': 350, 'mpp..."


GET MASTERY CHAMPIONS BY SUMMONER PUUID

In [14]:
endpoint = f"https://la2.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-puuid/{puuid}"

res = requests.get(endpoint, params=params)
data = res.json()

df_champions_mastery = pd.json_normalize(data)

df_champions_mastery.head()


Unnamed: 0,puuid,championId,championLevel,championPoints,lastPlayTime,championPointsSinceLastLevel,championPointsUntilNextLevel,markRequiredForNextLevel,tokensEarned,championSeasonMilestone,nextSeasonMilestone.requireGradeCounts.B-,nextSeasonMilestone.requireGradeCounts.C-,nextSeasonMilestone.rewardMarks,nextSeasonMilestone.bonus,nextSeasonMilestone.rewardConfig.rewardValue,nextSeasonMilestone.rewardConfig.rewardType,nextSeasonMilestone.rewardConfig.maximumReward,nextSeasonMilestone.totalGamesRequires,milestoneGrades,nextSeasonMilestone.requireGradeCounts.A-
0,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,64,32,372982,1717130755000,55382,-44382,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,
1,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,412,15,177802,1713936792000,47202,-36202,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,
2,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,222,13,153737,1705072769000,45137,-34137,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,
3,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,54,9,103913,1716999477000,39313,-28313,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,
4,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,22,10,102471,1719094494000,26871,-15871,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,


GET MATCHES LIST:
1- FIRST I GET DE MATCHES IDs LIST.
2- SINCE FROM THOSE IDs, I CAN GET MATCHES DATA 

In [15]:
endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids"

res = requests.get(endpoint, params=params)
matches_id = res.json()

matches_id

df_matches = []
for matchId in matches_id:
    endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/{matchId}"
    
    res = requests.get(endpoint, params=params)
    df_matches.append(res.json())
    
df_matches = pd.json_normalize(df_matches)

df_matches.head()

Unnamed: 0,metadata.dataVersion,metadata.matchId,metadata.participants,info.endOfGameResult,info.gameCreation,info.gameDuration,info.gameEndTimestamp,info.gameId,info.gameMode,info.gameName,info.gameStartTimestamp,info.gameType,info.gameVersion,info.mapId,info.participants,info.platformId,info.queueId,info.teams,info.tournamentCode
0,2,LA2_1460246484,[lmY9N2GCcOkNkiEn2FCEcUSbMx0njx137z_JYJ70BjGq5...,GameComplete,1731212021831,971,1731213098902,1460246484,CLASSIC,teambuilder-match-1460246484,1731212127539,MATCHED_GAME,14.22.633.1362,11,"[{'allInPings': 0, 'assistMePings': 1, 'assist...",LA2,420,"[{'bans': [{'championId': 910, 'pickTurn': 1},...",
1,2,LA2_1460072291,[rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2...,GameComplete,1731171961031,1551,1731173602569,1460072291,CLASSIC,teambuilder-match-1460072291,1731172051390,MATCHED_GAME,14.22.633.1362,11,"[{'allInPings': 0, 'assistMePings': 0, 'assist...",LA2,440,"[{'bans': [{'championId': 131, 'pickTurn': 1},...",
2,2,LA2_1460065368,[ZVJ2AwGOYNvpzMKVQ05zvFJ262i_mMWwLAOrLzVhrm9Q_...,GameComplete,1731170142054,1192,1731171456274,1460065368,ARAM,teambuilder-match-1460065368,1731170263577,MATCHED_GAME,14.22.633.1362,12,"[{'allInPings': 0, 'assistMePings': 0, 'assist...",LA2,450,"[{'bans': [], 'objectives': {'baron': {'first'...",
3,2,LA2_1459845889,[89dgszfFNxrGqy53Xo3-6-bdqKkQrmZxpkM3DroSMNrCh...,GameComplete,1731093971500,1965,1731096091643,1459845889,CLASSIC,teambuilder-match-1459845889,1731094126456,MATCHED_GAME,14.22.633.1362,11,"[{'allInPings': 0, 'assistMePings': 0, 'assist...",LA2,440,"[{'bans': [{'championId': 25, 'pickTurn': 1}, ...",
4,2,LA2_1459459621,[mwemyilHHb8cGLbG01UEbzxza1ZS_rmaWTzJVevAXzP91...,GameComplete,1730940992347,1451,1730942534176,1459459621,CLASSIC,teambuilder-match-1459459621,1730941083175,MATCHED_GAME,14.22.633.1362,11,"[{'allInPings': 7, 'assistMePings': 1, 'assist...",LA2,440,"[{'bans': [{'championId': 117, 'pickTurn': 1},...",


GET MATCHES TIMELINE

In [16]:
data = []
for matchId in matches_id:
    endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/{matchId}/timeline"
    
    res = requests.get(endpoint, params=params)
    data.append(res.json())
    
df_matches_timeline = pd.json_normalize(data)
df_matches_timeline.head()

Unnamed: 0,metadata.dataVersion,metadata.matchId,metadata.participants,info.endOfGameResult,info.frameInterval,info.frames,info.gameId,info.participants
0,2,LA2_1460246484,[lmY9N2GCcOkNkiEn2FCEcUSbMx0njx137z_JYJ70BjGq5...,GameComplete,60000,"[{'events': [{'realTimestamp': 1731212127400, ...",1460246484,"[{'participantId': 1, 'puuid': 'lmY9N2GCcOkNki..."
1,2,LA2_1460072291,[rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2...,GameComplete,60000,"[{'events': [{'realTimestamp': 1731172051251, ...",1460072291,"[{'participantId': 1, 'puuid': 'rybLSc_KZF6-IQ..."
2,2,LA2_1460065368,[ZVJ2AwGOYNvpzMKVQ05zvFJ262i_mMWwLAOrLzVhrm9Q_...,GameComplete,60000,"[{'events': [{'realTimestamp': 1731170263511, ...",1460065368,"[{'participantId': 1, 'puuid': 'ZVJ2AwGOYNvpzM..."
3,2,LA2_1459845889,[89dgszfFNxrGqy53Xo3-6-bdqKkQrmZxpkM3DroSMNrCh...,GameComplete,60000,"[{'events': [{'realTimestamp': 1731094126314, ...",1459845889,"[{'participantId': 1, 'puuid': '89dgszfFNxrGqy..."
4,2,LA2_1459459621,[mwemyilHHb8cGLbG01UEbzxza1ZS_rmaWTzJVevAXzP91...,GameComplete,60000,"[{'events': [{'realTimestamp': 1730941082954, ...",1459459621,"[{'participantId': 1, 'puuid': 'mwemyilHHb8cGL..."


GET SUMMONER RANKING BY SUMMONER ID

In [17]:
endpoint = f"https://la2.api.riotgames.com/lol/league/v4/entries/by-summoner/{summ_id}"

res = requests.get(endpoint, params=params)

data = res.json()

df_ranking = pd.json_normalize(data)

df_ranking

Unnamed: 0,leagueId,queueType,tier,rank,summonerId,leaguePoints,wins,losses,veteran,inactive,freshBlood,hotStreak
0,6d220f29-c66d-4999-8bee-fc750c55fb2e,RANKED_SOLO_5x5,PLATINUM,III,lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw,60,6,3,False,False,False,False
1,67df7375-00af-4a4f-9282-761ec6d3ca50,RANKED_FLEX_SR,EMERALD,IV,lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw,71,4,7,False,False,False,False


**LOAD RAW DATA**

LOADING RAW DATA TO AWS S3

In [18]:
#I'M USING BytesIO TO AVOID CREATING A CSV FILE IN DISC FOR EACH DATAFRAME.
#WHETHER I HAVE FILES IN DISK, I MUST USE 'UPLOAD_FILE' INSTEAD 'PUT_OBJECT' 
s3_client = boto3.client('s3', region_name='us-east-1')

#CREATING A BUCKET
s3_client.create_bucket(ACL='private', Bucket='mylolapibucket1')

def upload_dataframe_to_s3(dataframe:pd.DataFrame, object_name:str, bucket_name:str):
    csv_buffer = BytesIO()
    dataframe.to_csv(csv_buffer, index=False)

    s3_client.put_object(Bucket=bucket_name, Key=object_name, Body=csv_buffer.getvalue())
    
upload_dataframe_to_s3(df_champions_mastery, "df_champions_mastery.csv", "mylolapibucket1")
upload_dataframe_to_s3(df_champions, "df_champions.csv", "mylolapibucket1")
upload_dataframe_to_s3(df_matches, "df_matches.csv", "mylolapibucket1")
upload_dataframe_to_s3(df_matches_timeline, "df_matches_timeline.csv", "mylolapibucket1")
upload_dataframe_to_s3(df_ranking, "df_ranking.csv", "mylolapibucket1")

**TRANSFORMATION**

In [7]:
from pyspark.sql import SparkSession, SQLContext
from pyspark.sql.types import IntegerType, StringType, StructField, StructType
from pyspark.sql.functions import col, lit

spark_session = SparkSession.builder.appName("LOL_API_MODERN_DATAWAREHOUSE").getOrCreate()
sql_context = SQLContext(spark_session)

df_champions_spark = spark_session.createDataFrame(df_champions)

df_champions_spark




DataFrame[version: string, id: string, key: string, name: string, title: string, blurb: string, info: map<string,bigint>, image: map<string,string>, tags: array<string>, partype: string, stats: map<string,bigint>]