In [1]:
import pandas as pd
import os
import requests
import boto3
from utils import *
import datetime

**EXTRACTION**

GET SUMMONER PUUID BY SUMMONER NAME AND TAGLINE

In [2]:
api_key = os.environ.get("ETL-LOL_API")

summ_name = "TATIAN"
tag_line = "LAS"

params = {
    'api_key': api_key
}

servers = {
    'AMERICAS': 'americas.api.riotgames.com',
    'ASIA': 'asia.api.riotgames.com',
    'EUROPE': 'europe.api.riotgames.com',   
    'SEA': 'sea.api.riotgames.com'
}

endpoint = f"https://americas.api.riotgames.com/riot/account/v1/accounts/by-riot-id/{summ_name}/{tag_line}"

res = requests.get(endpoint, params= params)
data = res.json()

puuid = data['puuid']

df_summoner = pd.json_normalize(data)

df_summoner

Unnamed: 0,puuid,gameName,tagLine
0,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,TATIAN,LAS


GET SUMMONER DATA BY PUUID

In [3]:
endpoint = f"https://la2.api.riotgames.com/lol/summoner/v4/summoners/by-puuid/{puuid}"

res = requests.get(endpoint, params=params)
data = dict(res.json())

summ_id = data['id']

data.pop('puuid')

df_summoner = pd.concat([df_summoner, pd.json_normalize(data)], axis=1)

df_summoner = df_summoner.drop(columns=['profileIconId'])

df_summoner

Unnamed: 0,puuid,gameName,tagLine,id,accountId,revisionDate,summonerLevel
0,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,TATIAN,LAS,lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw,3SsoFKJ2HArAyEROZF7-cn0zHxd1m-rxRYxt9MScpr1oaME,1731213102000,362


GET CHAMPIONS LIST

In [4]:
champions_endpoint = "https://ddragon.leagueoflegends.com/cdn/14.22.1/data/en_US/champion.json"

data = requests.get(champions_endpoint).json()

df_champions = pd.DataFrame(data['data'])

df_champions = df_champions.T

df_champions.head(1)

Unnamed: 0,version,id,key,name,title,blurb,info,image,tags,partype,stats
Aatrox,14.22.1,Aatrox,266,Aatrox,the Darkin Blade,Once honored defenders of Shurima against the ...,"{'attack': 8, 'defense': 4, 'magic': 3, 'diffi...","{'full': 'Aatrox.png', 'sprite': 'champion0.pn...",[Fighter],Blood Well,"{'hp': 650, 'hpperlevel': 114, 'mp': 0, 'mpper..."


GET MASTERY CHAMPIONS BY SUMMONER PUUID

In [5]:
endpoint = f"https://la2.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-puuid/{puuid}"

res = requests.get(endpoint, params=params)
data = res.json()

df_champions_mastery = pd.json_normalize(data)

df_champions_mastery.head(1)


Unnamed: 0,puuid,championId,championLevel,championPoints,lastPlayTime,championPointsSinceLastLevel,championPointsUntilNextLevel,markRequiredForNextLevel,tokensEarned,championSeasonMilestone,nextSeasonMilestone.requireGradeCounts.B-,nextSeasonMilestone.requireGradeCounts.C-,nextSeasonMilestone.rewardMarks,nextSeasonMilestone.bonus,nextSeasonMilestone.rewardConfig.rewardValue,nextSeasonMilestone.rewardConfig.rewardType,nextSeasonMilestone.rewardConfig.maximumReward,nextSeasonMilestone.totalGamesRequires,milestoneGrades,nextSeasonMilestone.requireGradeCounts.A-
0,rybLSc_KZF6-IQ8HCLMML90_kvd00YLW5DE0ruoXGgZF2P...,64,32,372982,1717130755000,55382,-44382,2,0,0,1.0,4,1,False,5f4333db-e90d-4705-903b-08dbf5e61006,HEXTECH_CHEST,6.0,5,,


GET THE LAST 20 MATCHES:
1- FIRST I GET THE MATCHES IDs.
2- SINCE FROM THOSE IDs, I CAN GET MATCHES DATA 

In [6]:
endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids"

res = requests.get(endpoint, params=params)
matches_id = res.json()

matches_id

matches = []
for matchId in matches_id:
    endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/{matchId}"
    
    res = requests.get(endpoint, params=params)
    matches.append(res.json())
    
df_matches = pd.json_normalize(matches)

df_matches.head(1)

Unnamed: 0,metadata.dataVersion,metadata.matchId,metadata.participants,info.endOfGameResult,info.gameCreation,info.gameDuration,info.gameEndTimestamp,info.gameId,info.gameMode,info.gameName,info.gameStartTimestamp,info.gameType,info.gameVersion,info.mapId,info.participants,info.platformId,info.queueId,info.teams,info.tournamentCode
0,2,LA2_1460246484,[lmY9N2GCcOkNkiEn2FCEcUSbMx0njx137z_JYJ70BjGq5...,GameComplete,1731212021831,971,1731213098902,1460246484,CLASSIC,teambuilder-match-1460246484,1731212127539,MATCHED_GAME,14.22.633.1362,11,"[{'allInPings': 0, 'assistMePings': 1, 'assist...",LA2,420,"[{'bans': [{'championId': 910, 'pickTurn': 1},...",


GET MATCHES TIMELINE

In [7]:
data = []
for matchId in matches_id:
    endpoint = f"https://americas.api.riotgames.com/lol/match/v5/matches/{matchId}/timeline"
    
    res = requests.get(endpoint, params=params)
    data.append(res.json())
    
df_matches_timeline = pd.json_normalize(data)
df_matches_timeline.head(1)

Unnamed: 0,metadata.dataVersion,metadata.matchId,metadata.participants,info.endOfGameResult,info.frameInterval,info.frames,info.gameId,info.participants
0,2,LA2_1460246484,[lmY9N2GCcOkNkiEn2FCEcUSbMx0njx137z_JYJ70BjGq5...,GameComplete,60000,"[{'events': [{'realTimestamp': 1731212127400, ...",1460246484,"[{'participantId': 1, 'puuid': 'lmY9N2GCcOkNki..."


GET SUMMONER RANKING BY SUMMONER ID

In [8]:
endpoint = f"https://la2.api.riotgames.com/lol/league/v4/entries/by-summoner/{summ_id}"

res = requests.get(endpoint, params=params)

data = res.json()

df_ranking = pd.json_normalize(data)

df_ranking

Unnamed: 0,leagueId,queueType,tier,rank,summonerId,leaguePoints,wins,losses,veteran,inactive,freshBlood,hotStreak
0,6d220f29-c66d-4999-8bee-fc750c55fb2e,RANKED_SOLO_5x5,PLATINUM,III,lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw,60,6,3,False,False,False,False
1,67df7375-00af-4a4f-9282-761ec6d3ca50,RANKED_FLEX_SR,EMERALD,IV,lJDYoZSk-rtpKy1UnGV65cDlknAvnLOq8uvnGRFFCjIwHw,71,4,7,False,False,False,False


**LOAD RAW DATA TO DATA LAKE (S3)**

CREATING S3 CLIENT

In [12]:
s3_client = boto3.client('s3', region_name='us-east-1')

CREATING BUCKET

In [18]:
#CREATING A BUCKET
s3_client.create_bucket(ACL='private', Bucket='mylolapibucket1')

{'ResponseMetadata': {'RequestId': 'C8PJYRDTTAQWJGSE',
  'HostId': 'i4eX0rp7iLnYXdhw9g7wTieWQJoBb39a9pshims7Pzc+0JKdrs21S8LFTdl9ivOCyAq5ChLCI/0O4/2nX+mjsQ==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'i4eX0rp7iLnYXdhw9g7wTieWQJoBb39a9pshims7Pzc+0JKdrs21S8LFTdl9ivOCyAq5ChLCI/0O4/2nX+mjsQ==',
   'x-amz-request-id': 'C8PJYRDTTAQWJGSE',
   'date': 'Tue, 19 Nov 2024 20:18:55 GMT',
   'location': '/mylolapibucket1',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Location': '/mylolapibucket1'}

LOADING DATA

In [19]:
#PATH
s3_path = f"lol_data/{datetime.date.today().strftime("%Y-%m-%d")}"

upload_dataframe_to_s3(s3_client, df_summoner, f"{s3_path}/summoner.csv", "mylolapibucket1")
upload_dataframe_to_s3(s3_client, df_ranking, f"{s3_path}/ranking.csv", "mylolapibucket1")
upload_dataframe_to_s3(s3_client, df_champions, f"{s3_path}/champions.csv", "mylolapibucket1")
upload_dataframe_to_s3(s3_client, df_champions_mastery, f"{s3_path}/champions_mastery.csv", "mylolapibucket1")
upload_dataframe_to_s3(s3_client, df_matches, f"{s3_path}/matches.csv", "mylolapibucket1")
upload_dataframe_to_s3(s3_client, df_matches_timeline, f"{s3_path}/matches_timeline.csv", "mylolapibucket1")

**TRANSFORMATION**

In [161]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.sql.types import IntegerType, StringType, StructField, StructType, MapType, ArrayType
from pyspark.sql.functions import col, lit, from_unixtime, explode, from_json, concat_ws
import findspark

CREATING SPARK SESSION

In [None]:
findspark.init()
spark_session = SparkSession.builder.appName("LOL_API_MODERN_DATAWAREHOUSE").getOrCreate()

GETTING CSV FROM DATA LAKE (S3)

In [20]:
#NOT WORK - Py4JJavaError
#df_champions_spark = spark_session.read.csv(f's3a://mylolapibucket1/{s3_path}/df_champions.csv')

df_summoner_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/summoner.csv"), header=True)
df_ranking_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/ranking.csv"), header=True)
df_champions_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/champions.csv"), header=True)
df_champions_mastery_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/champions_mastery.csv"), header=True)
df_matches_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/matches.csv"), header=True)
df_matches_timeline_spark = spark_session.read.csv(download_files_from_s3(s3_client, 'mylolapibucket1', f"{s3_path}/matches_timeline.csv"), header=True)

SUMMONER TRANSFORM

In [167]:
#JOIN SUMMONER DF AND RANKING DF
df_summoner_spark = df_summoner_spark.join(df_ranking_spark, on=df_summoner_spark['id'] == df_ranking_spark['summonerId'])

#CONVERT UNIX TIMESTAMP TO DATE
df_summoner_spark = df_summoner_spark.withColumn("revisionDate", from_unixtime(col("revisionDate") / 1000).cast("date"))

#DELETE REPEAT COLUMNS
df_summoner_spark = df_summoner_spark.drop("summonerId")

df_summoner_spark = df_summoner_spark.withColumnRenamed("id", "summonerId")

df_summoner_spark.show()

+--------------------+--------+-------+--------------------+--------------------+------------+-------------+--------------------+---------------+--------+----+------------+----+------+-------+--------+----------+---------+
|               puuid|gameName|tagLine|          summonerId|           accountId|revisionDate|summonerLevel|            leagueId|      queueType|    tier|rank|leaguePoints|wins|losses|veteran|inactive|freshBlood|hotStreak|
+--------------------+--------+-------+--------------------+--------------------+------------+-------------+--------------------+---------------+--------+----+------------+----+------+-------+--------+----------+---------+
|rybLSc_KZF6-IQ8HC...|  TATIAN|    LAS|lJDYoZSk-rtpKy1Un...|3SsoFKJ2HArAyEROZ...|  2024-11-10|          362|6d220f29-c66d-499...|RANKED_SOLO_5x5|PLATINUM| III|          60|   6|     3|  False|   False|     False|    False|
|rybLSc_KZF6-IQ8HC...|  TATIAN|    LAS|lJDYoZSk-rtpKy1Un...|3SsoFKJ2HArAyEROZ...|  2024-11-10|          362|

CHAMPIONS TRANSFORM

In [None]:
#CONVER THE TYPE OF 'INFO' AND STATS COLUMNS
df_champions_spark = df_champions_spark.withColumn("info", from_json(col("info"), schema=MapType(StringType(), IntegerType())))
df_champions_spark = df_champions_spark.withColumn("stats", from_json(col("stats"), schema=MapType(StringType(), FloatType())))
df_champions_spark = df_champions_spark.withColumn("tags", from_json(col("tags"), schema=ArrayType(StringType())))

df_champions_spark = df_champions_spark.withColumn("tags", concat_ws(", ", col("tags")))

#EXPLODE INFO FIELD
df_champions_spark = df_champions_spark.select("*",
                                                 col("info.attack").alias("attack"),
                                                 col("info.defense").alias("defense"),
                                                 col("info.magic").alias("magic"),
                                                 col("info.difficulty").alias("difficulty"))

#EXPLODE STATS FIELD - IT WILL BE AN INDEPENDENT TABLE
df_champions_stats = df_champions_spark.select(col("stats.hp").alias("hp"),
                                                  col("stats.hpperlevel").alias("hpPerLevel"),
                                                  col("stats.mp").alias("mp"),
                                                  col("stats.mpperlevel").alias("mpPerLevel"),
                                                  col("stats.movespeed").alias("moveSpeed"),
                                                  col("stats.armor").alias("armor"),
                                                  col("stats.armorperlevel").alias("armorPerLevel"),
                                                  col("stats.spellblock").alias("spellBlock"),
                                                  col("stats.spellblockperlevel").alias("spellBlockPerLevel"),
                                                  col("stats.attackrange").alias("attackRange"),
                                                  col("stats.hpregen").alias("hpRegen"),
                                                  col("stats.hpregenperlevel").alias("hpRegenPerLevel"),
                                                  col("stats.mpregen").alias("mpRegen"),
                                                  col("stats.mpregenperlevel").alias("mpRegenPerLevel"),
                                                  col("stats.crit").alias("crit"),
                                                  col("stats.critperlevel").alias("critPerLevel"),
                                                  col("stats.attackdamage").alias("attackDamage"),
                                                  col("stats.attackdamageperlevel").alias("attackDamagePerLevel"),
                                                  col("stats.attackspeedperlevel").alias("attackSpeedPerLevel"),
                                                  col("stats.attackspeed").alias("attackSpeed"))

df_champions_spark = df_champions_spark.drop("info", "image", "stats")

df_champions_spark.show()