In [None]:
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)
import json
import geopandas as gpd

In [None]:
with open("./geoguessr_events.json", "r") as f:
    rows = [json.loads(line) for line in f.readlines()]

    answers = []
    for row in rows:
        if row["code"] != "LiveChallengeFinished":
            continue
        game_id = row["gameId"]
        for i, round in enumerate(row["liveChallenge"]["state"]["rounds"]):
            try:
                coordinates = round["answer"]["coordinateAnswerPayload"]["coordinate"]
            except TypeError:
                continue
            lat = coordinates["lat"]
            lng = coordinates["lng"]
            answer = {
                "gameId": game_id,
                "roundNumber": i + 1,
                "lat": lat,
                "lng": lng,
                "map": row["liveChallenge"]["state"]["options"]["mapSlug"]
            }
            answers.append(answer)
    answers_df = pd.DataFrame(answers)
answers_df.head()

In [None]:
world = gpd.read_file("./ne_10m_admin_0_countries.zip")
answers_gdf = gpd.GeoDataFrame(answers_df, geometry=gpd.points_from_xy(answers_df.lng, answers_df.lat), crs="EPSG:4326")
answers_gdf = gpd.tools.sjoin(answers_gdf, world, how="left")
answers_gdf = answers_gdf.rename(columns={"ISO_A2": "country"})
custom_rules = {
    228: "Christmas Island"
}
mask = (answers_gdf["index_right"].isin(custom_rules))
answers_gdf.loc[mask, "country"] = answers_gdf[mask]["index_right"].apply(lambda x: custom_rules[x])

# monaco rules
mask = ((answers_gdf["lat"].round(1) == 43.7) * (answers_gdf["lng"].round(1) == 7.4))
answers_gdf.loc[mask, "country"] = "MC"
columns = answers_df.columns.to_list() + ["country"]
answers_gdf = answers_gdf[columns]

In [None]:
with open("./geoguessr_events.json", "r") as f:
    rows = [json.loads(line) for line in f.readlines()]

    records = []
    for row in rows:
        if row["code"] != "LiveChallengeLeaderboardUpdate":
            continue
        common_values = row.copy()
        del common_values["liveChallenge"]
        game_id = common_values["gameId"]
        guesses = row["liveChallenge"]["leaderboards"]["round"]["guesses"]
        entries = row["liveChallenge"]["leaderboards"]["round"]["entries"]
        for i, guess in enumerate(guesses):
            if not guess:
                continue
            record = guess.copy()
            record.update(common_values)
            record["player"] = entries[i]["name"]
            records.append(record)
guesses_df = pd.DataFrame(records)
guesses_df = guesses_df.drop_duplicates(subset=["player", "roundNumber", "gameId"], keep="last")
guesses_df.head()

In [None]:
guesses_gdf = gpd.GeoDataFrame(guesses_df, geometry=gpd.points_from_xy(guesses_df.lng, guesses_df.lat), crs="EPSG:4326")
guesses_gdf = gpd.tools.sjoin(guesses_gdf, world, how="left")
columns = guesses_df.columns.to_list() + ["ISO_A2"]
guesses_gdf = guesses_gdf[columns]
guesses_gdf = guesses_gdf.drop(columns=["countryGuess"])
guesses_gdf = guesses_gdf.rename(columns={"ISO_A2": "country"})
guesses_gdf

In [None]:
gdf = guesses_gdf.merge(answers_gdf, on=["gameId", "roundNumber"], suffixes=["Guess", "Answer"])
gdf.head()

In [None]:
gdf.loc[gdf["countryAnswer"] == "-99", "countryAnswer"] = "NO_ID"
gdf["countryAnswer"] = gdf["countryAnswer"].fillna("NO_ID")

In [None]:
gdf["countryAnswer"].value_counts()

In [None]:
gdf.to_csv("gdf.csv", index=False)