In [11]:
# --- IMPORTS ---
import requests
import pandas as pd
import time
import os
import json
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# --- PARAMETRES ---
API_KEY_FOOTBALL = "94da850e64e939f43fe46eb6dd5b3f34"
HEADERS_FOOTBALL = {"x-apisports-key": API_KEY_FOOTBALL}
SEASON = 2023

SAVE_DIR = "/Users/leduigouvincent/Downloads/data_saved"
os.makedirs(SAVE_DIR, exist_ok=True)

# --- FONCTIONS UTILES ---
def safe_request(url, headers=None, params=None, retries=3, sleep_time=5):
    for attempt in range(retries):
        try:
            response = requests.get(url, headers=headers, params=params)
            if response.status_code == 200:
                return response.json()
            else:
                print(f"Erreur HTTP {response.status_code}: {response.text}")
        except Exception as e:
            print(f"Erreur de requête: {e}")
        time.sleep(sleep_time)
    return None

def save_df(df, filename):
    filepath = os.path.join(SAVE_DIR, filename)
    df.to_csv(filepath, index=False)
    print(f"Saved {filename}")

# --- ETAPE 1: Récupérer les compétitions ---
leagues_url = "https://v3.football.api-sports.io/leagues"
params_league = {"season": SEASON}

league_data = safe_request(leagues_url, headers=HEADERS_FOOTBALL, params=params_league)

list_leagues = []
if league_data:
    for league in league_data["response"]:
        list_leagues.append([
            league["league"]["name"],
            league["league"]["id"],
            league["country"]["name"]
        ])

    df_leagues = pd.DataFrame(list_leagues, columns=["league_name", "league_id", "league_country"])
    save_df(df_leagues, "leagues.csv")
else:
    print("Erreur lors de la récupération des ligues.")

# --- ETAPE 2: Clubs de LDC ---
teams_url = "https://v3.football.api-sports.io/teams"
params_teams = {"season": SEASON, "league": 2}

teams_data = safe_request(teams_url, headers=HEADERS_FOOTBALL, params=params_teams)

clubs_LDC_2023 = []
if teams_data:
    for team in teams_data["response"]:
        clubs_LDC_2023.append(team["team"]["name"])

    with open(os.path.join(SAVE_DIR, "clubs_LDC.json"), "w", encoding='utf-8') as f:
        json.dump(clubs_LDC_2023, f)
else:
    print("Erreur lors de la récupération des clubs de LDC.")

# --- ETAPE 3: Récupération des matchs ---
fixtures_url = "https://v3.football.api-sports.io/fixtures"

rows = []
for idx, comp in df_leagues.iterrows():
    params_fixtures = {"league": comp['league_id'], "season": SEASON, "status": "FT"}
    fixtures_data = safe_request(fixtures_url, headers=HEADERS_FOOTBALL, params=params_fixtures)

    if fixtures_data:
        for match in fixtures_data["response"]:
            home_team = match["teams"]["home"]["name"]
            away_team = match["teams"]["away"]["name"]

            if (home_team in clubs_LDC_2023) or (away_team in clubs_LDC_2023):
                rows.append({
                    "fixture_id": match["fixture"]["id"],
                    "datetime": match["fixture"]["date"],
                    "timezone": match["fixture"]["timezone"],
                    "venue": match["fixture"]["venue"]["name"],
                    "city": match["fixture"]["venue"]["city"],
                    "league": match["league"]["name"],
                    "country": comp['league_country'],
                    "round": match["league"]["round"],
                    "home_team": home_team,
                    "away_team": away_team,
                    "referee": match["fixture"].get("referee", None),
                    "duration": match["fixture"]["status"].get("elapsed", None)
                })
    time.sleep(7)

    if idx % 5 == 0:
        df_temp = pd.DataFrame(rows)
        save_df(df_temp, "matches_partial.csv")

df_matches = pd.DataFrame(rows)
save_df(df_matches, "matches.csv")

# --- ETAPE 4: Lineups ---
lineups_url = "https://v3.football.api-sports.io/fixtures/lineups"
lineups_rows = []

for fixture_id in df_matches["fixture_id"]:
    params_lineups = {"fixture": fixture_id}
    lineups_data = safe_request(lineups_url, headers=HEADERS_FOOTBALL, params=params_lineups)

    if lineups_data:
        try:
            team1 = lineups_data["response"][0]
            team2 = lineups_data["response"][1]

            lineups_rows.append({
                "fixture_id": fixture_id,
                "team1_name": team1["team"]["name"],
                "team1_formation": team1["formation"],
                "team2_name": team2["team"]["name"],
                "team2_formation": team2["formation"]
            })
        except:
            print(f"Lineups incomplètes pour {fixture_id}")
    time.sleep(7)

df_lineups = pd.DataFrame(lineups_rows)
save_df(df_lineups, "lineups.csv")

# --- ETAPE 5: Prédictions ---
predictions_url = "https://v3.football.api-sports.io/predictions"
predictions_rows = []

for fixture_id in df_matches["fixture_id"]:
    params_predictions = {"fixture": fixture_id}
    prediction_data = safe_request(predictions_url, headers=HEADERS_FOOTBALL, params=params_predictions)

    if prediction_data and prediction_data["response"]:
        prediction = prediction_data["response"][0]["predictions"]
        predictions_rows.append({
            "fixture_id": fixture_id,
            "win_home_percent": prediction["percent"]["home"],
            "draw_percent": prediction["percent"]["draw"],
            "win_away_percent": prediction["percent"]["away"]
        })
    time.sleep(7)

df_predictions = pd.DataFrame(predictions_rows)
save_df(df_predictions, "predictions.csv")

# --- ETAPE 6: Blessures ---
injuries_url = "https://v3.football.api-sports.io/injuries"
injuries_rows = []

for fixture_id in df_matches["fixture_id"]:
    params_injuries = {"fixture": fixture_id}
    injuries_data = safe_request(injuries_url, headers=HEADERS_FOOTBALL, params=params_injuries)

    if injuries_data:
        for injury in injuries_data["response"]:
            injuries_rows.append({
                "fixture_id": fixture_id,
                "team": injury["team"]["name"],
                "player_name": injury["player"]["name"],
                "type": injury["type"]
            })
    time.sleep(7)

df_injuries = pd.DataFrame(injuries_rows)
save_df(df_injuries, "injuries.csv")

# --- ETAPE 7: Météo ---
API_KEY_WEATHER = "6b0515591c6d847c4f7469dcb4233cc7"
base_weather_url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline"
weather_results = []

for idx, row in df_matches.iterrows():
    city = row['city']
    date_str = pd.to_datetime(row['datetime']).strftime('%Y-%m-%d')
    url = f"{base_weather_url}/{city}/{date_str}?key={API_KEY_WEATHER}&unitGroup=metric&include=days"

    weather_data = safe_request(url)

    if weather_data:
        day = weather_data['days'][0]
        weather_results.append({
            "fixture_id": row['fixture_id'],
            "humidity": day.get('humidity'),
            "temp": day.get('temp'),
            "precip": day.get('precip')
        })
    time.sleep(2)

df_weather = pd.DataFrame(weather_results)
save_df(df_weather, "weather.csv")

# --- ETAPE 8: Classements Premier League saison précédente ---
API_KEY_FOOTBALLDATA = "a2e5a2bf71714e5b94ce379fbdd5e054"
pl_standings_url = f"https://api.football-data.org/v4/competitions/PL/standings?season={SEASON-1}"
headers_fd = {"X-Auth-Token": API_KEY_FOOTBALLDATA}

response = requests.get(pl_standings_url, headers=headers_fd)
if response.status_code == 200:
    standings = response.json()["standings"][0]["table"]
    classement = pd.DataFrame([{
        'position': team['position'],
        'team': team['team']['name'],
        'points': team['points'],
        'playedGames': team['playedGames'],
        'won': team['won'],
        'draw': team['draw'],
        'lost': team['lost'],
        'goalsFor': team['goalsFor'],
        'goalsAgainst': team['goalsAgainst'],
        'goalDifference': team['goalDifference']
    } for team in standings])
    save_df(classement, "pl_standings.csv")
else:
    print("Erreur de récupération classement PL.")

# --- ETAPE 9: Web Scraping fbref Ligue des Champions ---
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

url = "https://fbref.com/fr/comps/8/calendrier/Scores-et-tableaux-Ligue-des-champions"
driver.get(url)
time.sleep(3)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

table = soup.find("table", {"id": "sched_all"})
matchs = []
if table:
    for row in table.find("tbody").find_all("tr"):
        cells = row.find_all("td")
        if len(cells) < 8:
            continue
        matchs.append({
            'date': cells[2].text.strip(),
            'hour': cells[3].text.strip(),
            'home_team': cells[4].text.strip(),
            'score': cells[6].text.strip(),
            'away_team': cells[8].text.strip(),
            'stadium': cells[10].text.strip(),
            'referee': cells[11].text.strip()
        })

driver.quit()

df_fbref = pd.DataFrame(matchs)
save_df(df_fbref, "fbref_ligue_champions.csv")

print("\n--- Projet Terminé et sauvegardé ---")


Saved leagues.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv
Saved matches_partial.csv


KeyboardInterrupt: 