In [1]:
# importing Dependencies
import requests
import json
import pandas as pd

In [2]:
# Define API endpoints and headers
teams_url = "https://api-football-v1.p.rapidapi.com/v3/teams"
stats_url = "https://api-football-v1.p.rapidapi.com/v3/coachs"
headers = {
    "X-RapidAPI-Key": "e745f20ff6mshb3a174087c1d159p11ee9ejsn4221f696691e",
    "X-RapidAPI-Host": "api-football-v1.p.rapidapi.com"
}

# Define the range of seasons
start_season = 2019
end_season = 2024
league_id = "371"  # Example league ID

# Initialize an empty list to hold all team IDs
all_team_ids = []

# Function to fetch teams for a given season and add their IDs to all_team_ids
def fetch_teams_for_season(season):
    querystring = {"league": league_id, "season": str(season)}
    response = requests.get(teams_url, headers=headers, params=querystring)
    
    if response.status_code == 200:
        data = response.json()
        for team in data['response']:
            team_id = team['team']['id']
            if team_id not in all_team_ids:
                all_team_ids.append(team_id)
    else:
        print(f"Failed to fetch teams for season {season}: {response.status_code}")

# Fetch teams for all seasons
for year in range(start_season, end_season):
    fetch_teams_for_season(year)

# Initialize a list to hold all coach data
all_coach_data = []

# Function to fetch statistics for a given team and season
def fetch_coach_statistics(team_id):
    querystring = {"team": str(team_id)}
    response = requests.get(stats_url, headers=headers, params=querystring)
    
    if response.status_code == 200:
        data = response.json()
        all_coach_data.append(data)
    else:
        print(f"Failed to fetch stats for team {team_id}: {response.status_code}")

# Fetch statistics for all teams in all seasons
for team_id in all_team_ids:
    fetch_coach_statistics(team_id)

# Print the aggregated statistics data in a formatted way
print(json.dumps(all_coach_data, indent=4))

[
    {
        "get": "coachs",
        "parameters": {
            "team": "574"
        },
        "errors": [],
        "results": 5,
        "paging": {
            "current": 1,
            "total": 1
        },
        "response": [
            {
                "id": 1833,
                "name": "G. Mihajlov",
                "firstname": "Gorazd",
                "lastname": "Mihajlov",
                "age": 51,
                "birth": {
                    "date": "1973-08-21",
                    "place": null,
                    "country": "North Macedonia"
                },
                "nationality": "North Macedonia",
                "height": null,
                "weight": null,
                "photo": "https://media.api-sports.io/football/coachs/1833.png",
                "team": {
                    "id": 574,
                    "name": "Vardar Skopje",
                    "logo": "https://media.api-sports.io/football/teams/574.png"
                },
    

In [17]:
# Initialize an empty list to accumulate rows
rows = []

for entry in all_coach_data:
    if entry["response"]:  # Check if the response list is not empty
        coach = entry["response"][0]

        # Extracting the required fields
        firstname = coach['firstname']
        lastname = coach['lastname']
        age = coach['age']
        birth_place = coach['birth']['place']
        birth_country = coach['birth']['country']

        # Extracting the teams the coach has worked with
        for career in coach['career']:
            row = {
                'First Name': firstname,
                'Last Name': lastname,
                'Age': age,
                'Birth Place': birth_place,
                'Birth Country': birth_country,
                'Team ID': career['team']['id'],
                'Team Name': career['team']['name'],
                'Start Date': career['start'],
                'End Date': career['end']
            }
            rows.append(row)

# Create a DataFrame
df = pd.DataFrame(rows)

In [18]:
df.head(100)

Unnamed: 0,First Name,Last Name,Age,Birth Place,Birth Country,Team ID,Team Name,Start Date,End Date
0,Gorazd,Mihajlov,51.0,,North Macedonia,574.0,Vardar Skopje,2023-12-01,
1,Gorazd,Mihajlov,51.0,,North Macedonia,4331.0,Sileks,2021-06-01,2023-12-01
2,Gorazd,Mihajlov,51.0,,North Macedonia,4334.0,Borec Veles,2018-07-01,2021-01-01
3,Gorazd,Mihajlov,51.0,,North Macedonia,4347.0,Teteks,2015-01-01,2016-06-01
4,Gorazd,Mihajlov,51.0,,North Macedonia,,Napredok,2013-08-01,2014-01-01
...,...,...,...,...,...,...,...,...,...
79,Ismail,Atalan,44.0,,Germany,4338.0,KF Gostivari,2024-01-01,
80,Ismail,Atalan,44.0,,Germany,1316.0,Hallescher FC,2020-02-01,2020-06-01
81,Ismail,Atalan,44.0,,Germany,1323.0,Sportfreunde Lotte,2019-04-01,2020-02-01
82,Ismail,Atalan,44.0,,Germany,176.0,Bochum,2017-07-01,2017-10-01


In [20]:
# Convert 'Start Date' and 'End Date' columns to datetime format
df['Start Date'] = pd.to_datetime(df['Start Date'], format='%Y-%m-%d', errors='coerce')
df['End Date'] = pd.to_datetime(df['End Date'], format='%Y-%m-%d', errors='coerce')

# Handle NaN values before converting to integers
df['Age'] = df['Age'].fillna(0).astype(int)

# Define dictionary with column names and desired data types
dtypes_dict = {
    'First Name': str,
    'Last Name': str,
    'Age': int,
    'Birth Place': str,
    'Birth Country': str,
    'Team ID': float,
    'Team Name': str,
}

# Change data types of columns
coach_df =df.astype(dtypes_dict)

In [24]:
coach_filtered_df = coach_df[coach_df['Team ID'].isin(all_team_ids)]

In [25]:
#Converting First Macedonia League data to Csv
coach_filtered_df.to_csv('First_Macedonia_League_Coach_Data_Raw.csv', index=False)