In [1]:
import json
import requests
import pandas as pd

In [2]:
with open("../data/team-stadium.json", "r") as fp:
    teams = json.load(fp)

### Scrapping stadium coordinates

In [3]:
def get_stadium_loc(stadium, api_key):
    base_url = "https://api.opencagedata.com/geocode/v1/json"
    params = {
        "q": stadium,
        "key": api_key,
    }

    response = requests.get(base_url, params=params)
    resultado = response.json()

    if response.status_code == 200 and resultado["total_results"] > 0:

        locale = resultado["results"][0]["geometry"]
        lat = locale.get("lat")
        long = locale.get("lng")

        components = resultado['results'][0]['components']
        city = components.get('city')
        state = components.get('state_code')
        country = components.get('country')

        return {
            "stadium": stadium,
            "lat": lat,
            "long": long,
            "country": country,
            "state": state,
            "city": city
        }

    return {}

In [4]:
key = "d07aa8ca68c64037a965c407b912a30b"

bottom_4_b = ["Londrina EC", "Sampaio Corrêa FC", "ABC FC", "Tombense FC"]

locs = {}
for team in teams:
    stadium = team['stadium']
    print(f"{team['team']} == {stadium}")
    if stadium in locs.keys():
        print("Estádio cadastrado")
        continue
    if team['team'] in bottom_4_b:
        print("Time rebaixado")
        continue
    stadium_metadata = get_stadium_loc(stadium, key)

    locs[stadium] = stadium_metadata


Fluminense FC == Estádio Jornalista Mário Filho
CR Flamengo == Estádio Jornalista Mário Filho
Estádio cadastrado
São Paulo FC == Estádio Cícero Pompeu de Toledo
Cruzeiro EC == Estádio Governador Magalhães Pinto
Grêmio FBPA == Arena do Grêmio
Fortaleza EC == Estádio Governador Plácido Castelo
SC Internacional == Estádio Beira-Rio
SC Corinthians == Neo Química Arena 
EC Bahia == Arena Fonte Nova
Botafogo FR == Estádio Olímpico Nilton Santos
Atlético Mineiro == Arena MRV
SE Palmeiras == Allianz Parque
Cuiabá EC == Arena Pantanal
Athletico Paranaense == Ligga Arena
Coritiba FC == Estádio Major Antônio Couto Pereira
América Mineiro == Estádio Raimundo Sampaio
CR Vasco da Gama == Estádio São Januário
Santos FC == Estádio Urbano Caldeira
RB Bragantino == Estádio Nabi Abi Chedid
Goiás EC == Estádio de Hailé Pinheiro
Ceará SC == Estádio Governador Plácido Castelo
Estádio cadastrado
Sampaio Corrêa FC == Estádio Governador João Castelo - Castelão
Time rebaixado
Londrina EC == Estádio Municipal Ja

In [5]:
complete_stadium = pd.DataFrame(locs).T
complete_stadium.reset_index(drop=True, inplace=True)
complete_stadium

Unnamed: 0,stadium,lat,long,country,state,city
0,Estádio Jornalista Mário Filho,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
1,Estádio Cícero Pompeu de Toledo,-23.600058,-46.720158,Brazil,SP,São Paulo
2,Estádio Governador Magalhães Pinto,-19.865911,-43.971043,Brazil,MG,Belo Horizonte
3,Arena do Grêmio,-29.974043,-51.195107,Brazil,RS,Porto Alegre
4,Estádio Governador Plácido Castelo,-3.806742,-38.521679,Brazil,CE,Fortaleza
5,Estádio Beira-Rio,-21.285521,-42.552353,Brazil,MG,
6,Neo Química Arena,-23.545293,-46.474286,Brazil,SP,São Paulo
7,Arena Fonte Nova,-0.01381,-51.17252,Brazil,AP,
8,Estádio Olímpico Nilton Santos,-22.893194,-43.293569,Brazil,RJ,Rio de Janeiro
9,Arena MRV,-19.930012,-44.013881,Brazil,MG,Belo Horizonte


### Integrating stadium to teams

In [6]:
df_teams = pd.DataFrame(teams)
df_teams = df_teams.loc[~(df_teams["team"].isin(bottom_4_b))]
df_teams.reset_index(drop=True, inplace=True)
df_teams

Unnamed: 0,team,stadium,img
0,Fluminense FC,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...
1,CR Flamengo,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...
2,São Paulo FC,Estádio Cícero Pompeu de Toledo,https://tmssl.akamaized.net/images/wappen/very...
3,Cruzeiro EC,Estádio Governador Magalhães Pinto,https://tmssl.akamaized.net/images/wappen/very...
4,Grêmio FBPA,Arena do Grêmio,https://tmssl.akamaized.net/images/wappen/very...
5,Fortaleza EC,Estádio Governador Plácido Castelo,https://tmssl.akamaized.net/images/wappen/very...
6,SC Internacional,Estádio Beira-Rio,https://tmssl.akamaized.net/images/wappen/very...
7,SC Corinthians,Neo Química Arena,https://tmssl.akamaized.net/images/wappen/very...
8,EC Bahia,Arena Fonte Nova,https://tmssl.akamaized.net/images/wappen/very...
9,Botafogo FR,Estádio Olímpico Nilton Santos,https://tmssl.akamaized.net/images/wappen/very...


In [7]:
full_df = df_teams.merge(complete_stadium, on="stadium", how="left")

In [8]:
full_df

Unnamed: 0,team,stadium,img,lat,long,country,state,city
0,Fluminense FC,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
1,CR Flamengo,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
2,São Paulo FC,Estádio Cícero Pompeu de Toledo,https://tmssl.akamaized.net/images/wappen/very...,-23.600058,-46.720158,Brazil,SP,São Paulo
3,Cruzeiro EC,Estádio Governador Magalhães Pinto,https://tmssl.akamaized.net/images/wappen/very...,-19.865911,-43.971043,Brazil,MG,Belo Horizonte
4,Grêmio FBPA,Arena do Grêmio,https://tmssl.akamaized.net/images/wappen/very...,-29.974043,-51.195107,Brazil,RS,Porto Alegre
5,Fortaleza EC,Estádio Governador Plácido Castelo,https://tmssl.akamaized.net/images/wappen/very...,-3.806742,-38.521679,Brazil,CE,Fortaleza
6,SC Internacional,Estádio Beira-Rio,https://tmssl.akamaized.net/images/wappen/very...,-21.285521,-42.552353,Brazil,MG,
7,SC Corinthians,Neo Química Arena,https://tmssl.akamaized.net/images/wappen/very...,-23.545293,-46.474286,Brazil,SP,São Paulo
8,EC Bahia,Arena Fonte Nova,https://tmssl.akamaized.net/images/wappen/very...,-0.01381,-51.17252,Brazil,AP,
9,Botafogo FR,Estádio Olímpico Nilton Santos,https://tmssl.akamaized.net/images/wappen/very...,-22.893194,-43.293569,Brazil,RJ,Rio de Janeiro


### Manually checking if stadiums are in correct place

In [12]:
stadiums_to_fix = pd.DataFrame([
    {
        "stadium": "Estádio Beira-Rio",
        "lat": "-30.065504685798906",
        "long": "-51.23585549512045",
        "country": "Brazil",
        "state": "RS",
        "city": "Porto Alegre"
    },
    {
        "stadium": "Arena Fonte Nova",
        "lat": "-12.978865418421202",
        "long": "-38.50436328295125",
        "country": "Brazil",
        "state": "BA",
        "city": "Salvador"
    },
    {
        "stadium": "Estádio de Hailé Pinheiro",
        "lat": "-16.710161214144666",
        "long": "-49.261256018365415",
        "country": "Brazil",
        "state": "GO",
        "city": "Goiânia"
    },
    {
        "stadium": "Estádio Jorge Ismael de Biasi",
        "lat": "-21.467156396664876",
        "long": "-49.232497864259656",
        "country": "Brazil",
        "state": "SP",
        "city": "Novo Horizonte"
    },
    {
        "stadium": "Estádio Municipal José María de Campos Maia",
        "lat": "-20.82104422972856",
        "long": "-49.50675372326037",
        "country": "Brazil",
        "state": "SP",
        "city": "Mirassol"
    },
    {
        "stadium": "Estadio Germano Kruger",
        "lat": "-25.116011132619022",
        "long": "-50.15654526042742",
        "country": "Brazil",
        "state": "PR",
        "city": "Ponta Grossa"
    },
    {
        "stadium": "Estádio Banpará Curuzu",
        "lat": "-1.443778488242549",
        "long": "-48.46286997625053",
        "country": "Brazil",
        "state": "PA",
        "city": "Belém"
    },
    
])

stadiums_fixed = pd.concat([complete_stadium, stadiums_to_fix], axis=0)
stadiums_fixed.dropna(subset="lat", inplace=True)
stadiums_fixed.drop_duplicates(subset=["stadium"], keep="last", inplace=True)
stadiums_fixed.reset_index(drop=True, inplace=True)
stadiums_fixed

Unnamed: 0,stadium,lat,long,country,state,city
0,Estádio Jornalista Mário Filho,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
1,Estádio Cícero Pompeu de Toledo,-23.600058,-46.720158,Brazil,SP,São Paulo
2,Estádio Governador Magalhães Pinto,-19.865911,-43.971043,Brazil,MG,Belo Horizonte
3,Arena do Grêmio,-29.974043,-51.195107,Brazil,RS,Porto Alegre
4,Estádio Governador Plácido Castelo,-3.806742,-38.521679,Brazil,CE,Fortaleza
5,Neo Química Arena,-23.545293,-46.474286,Brazil,SP,São Paulo
6,Estádio Olímpico Nilton Santos,-22.893194,-43.293569,Brazil,RJ,Rio de Janeiro
7,Arena MRV,-19.930012,-44.013881,Brazil,MG,Belo Horizonte
8,Allianz Parque,-23.527497,-46.678528,Brazil,SP,São Paulo
9,Arena Pantanal,-15.604065,-56.122299,Brazil,MT,Cuiabá


### Integrating fixed locations to teams

In [13]:
full_df = df_teams.merge(stadiums_fixed, on="stadium", how="left")
full_df

Unnamed: 0,team,stadium,img,lat,long,country,state,city
0,Fluminense FC,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
1,CR Flamengo,Estádio Jornalista Mário Filho,https://tmssl.akamaized.net/images/wappen/very...,-22.912162,-43.231186,Brazil,RJ,Rio de Janeiro
2,São Paulo FC,Estádio Cícero Pompeu de Toledo,https://tmssl.akamaized.net/images/wappen/very...,-23.600058,-46.720158,Brazil,SP,São Paulo
3,Cruzeiro EC,Estádio Governador Magalhães Pinto,https://tmssl.akamaized.net/images/wappen/very...,-19.865911,-43.971043,Brazil,MG,Belo Horizonte
4,Grêmio FBPA,Arena do Grêmio,https://tmssl.akamaized.net/images/wappen/very...,-29.974043,-51.195107,Brazil,RS,Porto Alegre
5,Fortaleza EC,Estádio Governador Plácido Castelo,https://tmssl.akamaized.net/images/wappen/very...,-3.806742,-38.521679,Brazil,CE,Fortaleza
6,SC Internacional,Estádio Beira-Rio,https://tmssl.akamaized.net/images/wappen/very...,-30.065504685798903,-51.23585549512045,Brazil,RS,Porto Alegre
7,SC Corinthians,Neo Química Arena,https://tmssl.akamaized.net/images/wappen/very...,-23.545293,-46.474286,Brazil,SP,São Paulo
8,EC Bahia,Arena Fonte Nova,https://tmssl.akamaized.net/images/wappen/very...,-12.978865418421202,-38.50436328295125,Brazil,BA,Salvador
9,Botafogo FR,Estádio Olímpico Nilton Santos,https://tmssl.akamaized.net/images/wappen/very...,-22.893194,-43.293569,Brazil,RJ,Rio de Janeiro


In [14]:
full_df.to_csv("../data/team-stadium-full.csv")