In [46]:
import polars as pl
from polars_reverse_geocode import find_closest_country
import json

In [47]:
with open("./geoguessr_events.json", "r") as f:
    answers = []
    for line in f:
        row = json.loads(line)
        if row["code"] != "LiveChallengeFinished":
            continue
        game_id = row["gameId"]
        for i, round in enumerate(row["liveChallenge"]["state"]["rounds"]):
            try:
                coordinates = round["answer"]["coordinateAnswerPayload"]["coordinate"]
            except TypeError:
                continue
            lat = coordinates["lat"]
            lng = coordinates["lng"]
            answer = {
                "gameId": game_id,
                "roundNumber": i + 1,
                "lat": lat,
                "lng": lng,
            }
            answers.append(answer)
    answers_df = pl.DataFrame(answers)
answers_df.head()

gameId,roundNumber,lat,lng
str,i64,f64,f64
"""d30d3d04-d8d1-44cb-ab9e-294433…",1,-78.065498,164.144104
"""d30d3d04-d8d1-44cb-ab9e-294433…",2,13.854082,100.843712
"""d30d3d04-d8d1-44cb-ab9e-294433…",3,13.557908,144.854996
"""d30d3d04-d8d1-44cb-ab9e-294433…",4,38.005451,23.947958
"""d30d3d04-d8d1-44cb-ab9e-294433…",5,34.247738,36.002708


In [48]:
answers_df = answers_df.with_columns(
    country_code=find_closest_country("lat", "lng")
)
answers_df.head()

gameId,roundNumber,lat,lng,country_code
str,i64,f64,f64,str
"""d30d3d04-d8d1-44cb-ab9e-294433…",1,-78.065498,164.144104,"""AQ"""
"""d30d3d04-d8d1-44cb-ab9e-294433…",2,13.854082,100.843712,"""TH"""
"""d30d3d04-d8d1-44cb-ab9e-294433…",3,13.557908,144.854996,"""GU"""
"""d30d3d04-d8d1-44cb-ab9e-294433…",4,38.005451,23.947958,"""GR"""
"""d30d3d04-d8d1-44cb-ab9e-294433…",5,34.247738,36.002708,"""LB"""


In [49]:
with open("./geoguessr_events.json", "r") as f:
    records = []
    for line in f:
        row = json.loads(line)
        if row["code"] != "LiveChallengeLeaderboardUpdate":
            continue
        common_values = row.copy()
        del common_values["liveChallenge"]
        game_id = common_values["gameId"]
        guesses = row["liveChallenge"]["leaderboards"]["round"]["guesses"]
        entries = row["liveChallenge"]["leaderboards"]["round"]["entries"]
        for i, guess in enumerate(guesses):
            if not guess:
                continue
            record = guess.copy()
            record.update(common_values)
            record["player"] = entries[i]["name"]
            records.append(record)
guesses_df = pl.DataFrame(records)
guesses_df = guesses_df.unique(subset=["player", "roundNumber", "gameId"], keep="last")
guesses_df.head()

roundNumber,lat,lng,distance,time,score,wasCorrect,gameId,playerName,code,playerId,payload,timestamp,lobby,countryGuess,coordinateGuess,battleRoyaleGameState,battleRoyalePlayer,duel,bullseye,player
i64,f64,f64,f64,i64,i64,bool,str,str,str,null,null,str,null,null,null,null,null,null,null,str
2,24.925957,84.90989,801284.133393,25,2922,False,"""e167e423-b11b-4397-a1a3-fcd00f…","""Demaga Chill""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-08T08:42:38.9661186Z""",,,,,,,,"""Demaga Chill"""
3,41.331447,19.494662,1144800.0,17,2321,False,"""c40e6798-271f-4e4e-80bc-2e9af7…","""San4i""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-16T15:08:55.7319049Z""",,,,,,,,"""San4i"""
5,35.852773,128.567611,356080.926705,27,3938,False,"""d8dbceba-638b-40fc-8718-9c4348…","""Ishenko01y""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-15T09:41:20.5245749Z""",,,,,,,,"""Ishenko01y"""
5,20.711627,-103.32937,7767200.0,39,27,False,"""ea8d34ee-d2c9-4283-834e-f0438f…","""Ishenko01y""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-21T18:49:25.6811034Z""",,,,,,,,"""Ishenko01y"""
2,49.52037,18.768828,478099.896782,25,3629,False,"""db11058f-d91c-4cd5-a3ad-8d3b8a…","""Gor Kosty""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-06T12:51:21.0636625Z""",,,,,,,,"""Gor Kosty"""


In [50]:
guesses_df = guesses_df.with_columns(
    country_code=find_closest_country("lat", "lng")
)
guesses_df.head(2)

roundNumber,lat,lng,distance,time,score,wasCorrect,gameId,playerName,code,playerId,payload,timestamp,lobby,countryGuess,coordinateGuess,battleRoyaleGameState,battleRoyalePlayer,duel,bullseye,player,country_code
i64,f64,f64,f64,i64,i64,bool,str,str,str,null,null,str,null,null,null,null,null,null,null,str,str
2,24.925957,84.90989,801284.133393,25,2922,False,"""e167e423-b11b-4397-a1a3-fcd00f…","""Demaga Chill""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-08T08:42:38.9661186Z""",,,,,,,,"""Demaga Chill""","""IN"""
3,41.331447,19.494662,1144800.0,17,2321,False,"""c40e6798-271f-4e4e-80bc-2e9af7…","""San4i""","""LiveChallengeLeaderboardUpdate""",,,"""2024-08-16T15:08:55.7319049Z""",,,,,,,,"""San4i""","""AL"""


In [51]:
guesses_df["country_code"].value_counts(sort=True)

country_code,count
str,u32
"""GH""",475
"""US""",274
"""FR""",163
"""ES""",125
"""GB""",121
…,…
"""RE""",1
"""SH""",1
"""NA""",1
"""NF""",1


In [52]:
with open("./geoguessr_events.json", "r") as f:
    game_options = {}
    for line in f:
        row = json.loads(line)
        if row["code"] != "LiveChallengeFinished":
            continue
        game_id = row["gameId"]
        if game_id not in game_options:
            game_options[game_id] = row["liveChallenge"]["state"]["options"]
    
    games = []
    for game_id in game_options:
        game = {"game_id": game_id}
        moving_options = game_options[game_id]["movementOptions"]
        game.update(moving_options)
        del game_options[game_id]["movementOptions"]
        game.update(game_options[game_id])
        games.append(game)
    games_df = pl.DataFrame(games)
games_df.head()

game_id,forbidMoving,forbidZooming,forbidRotating,roundCount,mapSlug,roundTime,isSinglePlayer,scoringType,context,date
str,bool,bool,bool,i64,str,i64,bool,str,str,str
"""d30d3d04-d8d1-44cb-ab9e-294433…",True,False,False,5,"""world""",20,False,"""Unknown""","""""","""2024-10-11T13:39:41.699Z"""
"""43b82fcf-29d8-4daf-84e9-80e029…",True,False,False,5,"""world""",40,False,"""Unknown""","""""","""2024-10-30T18:06:54.013Z"""
"""14cf63a7-1289-4b09-9ab8-75eb4f…",True,False,False,5,"""world""",40,False,"""Unknown""","""""","""2024-10-30T18:11:55.588Z"""
"""7df0b9bb-c330-45fe-9117-2820ce…",True,False,False,5,"""world""",40,False,"""Unknown""","""""","""2024-10-30T18:16:38.505Z"""
"""bc1276a6-4244-4a22-b60a-1c5f3f…",True,False,False,5,"""world""",40,False,"""Unknown""","""""","""2024-10-30T18:21:57.020Z"""


In [53]:
games_df = games_df.with_columns(
    mapSlug=pl.col("mapSlug").replace("5d374dc141d2a43c1cd4527b", "GeoDetective")
)
games_df["mapSlug"].value_counts()

mapSlug,count
str,u32
"""GeoDetective""",3
"""world""",294
"""61a1846aee665b00016680ce""",1
"""61ea4e1d4356b500014cdc8d""",1
"""5d0ce72c8b19a91fe05aa7a8""",1
"""57357d9f77abe957e8cfd15f""",3
"""60de2a8a81b92c00015f29e1""",2
"""6029991c5048850001d572a9""",3
"""5cfda2c9bc79e16dd866104d""",1
"""5ad0b0cb2a3e0d4da46cc44c""",4


In [54]:
answers_df.write_csv("answers_df.csv")

In [55]:
guesses_df.write_csv("guesses_df.csv")

In [56]:
games_df.write_csv("games_df.csv")