In [99]:
import pandas as pd
import numpy as np
from datetime import datetime
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)

In [161]:
season_list = ['1617', '1718', '1819', '1920', '2021', '2122', '2223', '2324', '2425']
league_list = ['E0', 'E1', 'E2', 'E3', 'SC0', 'SC1', 'SC2', 'D1', 'D2', 'I1', 'I2', 
               'SP1', 'SP2', 'F1', 'F2', 'N1', 'B1', 'P1', 'T1', 'G1'] 

df_list = []

for league in league_list:
    for season in season_list:
        data_url = f'https://www.football-data.co.uk/mmz4281/{season}/{league}.csv'
        try:
            df = pd.read_csv(data_url)
        except:
            try:
                df = pd.read_csv(data_url, encoding='latin')
            except:
                print(f"Failed to load data for {league} {season}")
                continue
        df['Season'] = season
        df['League'] = league
        df_list.append(df)

main_df = pd.concat(df_list, ignore_index=True)

main_df['Date'] = pd.to_datetime(main_df['Date'], format='mixed')
main_df = main_df.sort_values(by='Date')

df = main_df
df = df[::-1]
df.reset_index(inplace=True)

  df.reset_index(inplace=True)


In [163]:
import pandas as pd

season_list = ['1617', '1718', '1819', '1920', '2021', '2122', '2223', '2324', '2425']

def get_teams_by_league(league):
    df_list = []

    for season in season_list:
        url = f"https://www.football-data.co.uk/mmz4281/{season}/{league}.csv"
        try:
            df = pd.read_csv(url)
        except:
            try:
                df = pd.read_csv(url, encoding='latin')
            except:
                print(f"❌ Misslyckades att ladda {league} {season}")
                continue

        df_list.append(df)

    if not df_list:
        return []

    full_df = pd.concat(df_list, ignore_index=True)

    # Plocka ut alla unika lag från både hemma och bortalag
    teams = pd.unique(pd.concat([full_df['HomeTeam'], full_df['AwayTeam']]).dropna())
    return sorted(teams.tolist())

In [101]:
df_swe = pd.read_csv('https://www.football-data.co.uk/new/SWE.csv')

df_swe['Date'] = pd.to_datetime(df_swe['Date'], format='%d/%m/%Y')
df_swe= df_swe.sort_values(by='Date')
df_swe = df_swe[::-1]

df_swe.reset_index(inplace=True)

In [102]:
def get_teams_result(df):
    resultat = {}

    teams = set(df["HomeTeam"]).union(set(df["AwayTeam"]))

    for team in teams:
        resultat[team] = {}
        
    for _, rad in df.iterrows():
        hemma, borta = rad["HomeTeam"], rad["AwayTeam"]
        mål_a, mål_b = rad["FTHG"], rad["FTAG"]

        if hemma not in resultat:
            resultat[hemma] = {}
        if borta not in resultat:
            resultat[borta] = {}

        if borta not in resultat[hemma]:
            resultat[hemma][borta] = {"Vinster": 0, "Förluster": 0, "Oavgjorda": 0}
        if hemma not in resultat[borta]:
            resultat[borta][hemma] = {"Vinster": 0, "Förluster": 0, "Oavgjorda": 0}

        if mål_a > mål_b: 
            resultat[hemma][borta]["Vinster"] += 1
            resultat[borta][hemma]["Förluster"] += 1
        elif mål_a < mål_b: 
            resultat[borta][hemma]["Vinster"] += 1
            resultat[hemma][borta]["Förluster"] += 1
        else:
            resultat[hemma][borta]["Oavgjorda"] += 1
            resultat[borta][hemma]["Oavgjorda"] += 1
            
    return resultat

#get_teams_result(df)['Man City']

In [103]:
def get_home_vs_away_summary(df, home_team, away_team):

    matcher = df[(df["HomeTeam"] == home_team) & (df["AwayTeam"] == away_team)]

    resultat = {"Vinster": 0, "Förluster": 0, "Oavgjorda": 0}

    for _, rad in matcher.iterrows():
        mål_hemma = rad["FTHG"]
        mål_borta = rad["FTAG"]

        if mål_hemma > mål_borta:
            resultat["Vinster"] += 1
        elif mål_hemma < mål_borta:
            resultat["Förluster"] += 1
        else:
            resultat["Oavgjorda"] += 1

    return resultat

#get_home_vs_away_summary(df,'Liverpool', 'Arsenal')

In [104]:
def get_team_league_test():
    teams = []

    # Hantera engelska ligor (Div)
    all_eng_teams = df['HomeTeam'].astype(str).tolist() + df['AwayTeam'].astype(str).tolist()   
    unique_eng_teams = np.unique(all_eng_teams)
    teams.extend([str(team) for team in unique_eng_teams])

    # Hämta ligor från båda dataframes
    english_leagues = sorted(df['Div'].astype(str).unique().tolist())
    swedish_leagues = sorted(df_swe['League'].astype(str).unique().tolist())

    all_leagues = english_leagues + swedish_leagues
    return teams, all_leagues

In [105]:
def get_team_league():
    teams_by_league = []
    all_leagues = []

    # Hantera engelska ligor
    english_leagues = sorted(df['Div'].astype(str).unique().tolist())
    for league in english_leagues:
        league_df = df[df['Div'] == league]
        teams = league_df['HomeTeam'].astype(str).tolist() + league_df['AwayTeam'].astype(str).tolist()
        unique_teams = sorted(np.unique(teams))
        teams_by_league.append(unique_teams)
        all_leagues.append(league)

    # Hantera svenska ligor, ta bort whitespaces och duplicates
    swedish_leagues_raw = df_swe['League'].astype(str).str.strip()
    swedish_leagues = sorted(swedish_leagues_raw.unique().tolist())

    for league in swedish_leagues:
        league_df = df_swe[df_swe['League'].str.strip() == league]
        teams = league_df['Home'].astype(str).tolist() + league_df['Away'].astype(str).tolist()
        unique_teams = sorted(np.unique(teams))
        teams_by_league.append(unique_teams)
        all_leagues.append(league)

    return teams_by_league, all_leagues

In [106]:
col_list = ['Div', 'Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'B365H', 'B365D', 'B365A']

In [None]:
def get_team_form(df, team, num_matches=10):

    team_matches = df[(df['HomeTeam'] == team) | (df['AwayTeam'] == team)].copy()

    team_matches = team_matches.sort_values(by='Date', ascending=False).head(num_matches)

    form = []

    for _, row in team_matches.iterrows():
        is_home = row['HomeTeam'] == team
        goals_for = row['FTHG'] if is_home else row['FTAG']
        goals_against = row['FTAG'] if is_home else row['FTHG']

        if goals_for > goals_against:
            form.append('W')
        elif goals_for == goals_against:
            form.append('D')
        else:
            form.append('L')

    return form[::-1]  

#get_team_form(df, 'Tottenham')

In [108]:
def get_fixture_by_league(league):
    df_next_games = pd.read_csv('https://www.football-data.co.uk/fixtures.csv')
    
    df_next_games['Date'] = pd.to_datetime(df_next_games['Date'], format='%d/%m/%Y', errors='coerce')
    today = pd.Timestamp.today().normalize()
    df_next_games = df_next_games[df_next_games['Date'] >= today]

    if league in df_next_games['Div'].unique():
        league_fixtures = df_next_games[df_next_games['Div'] == league]
        fixtures_list = league_fixtures[['HomeTeam', 'AwayTeam', 'Date', 'Time']].dropna().to_dict(orient='records')
        return fixtures_list
    return []

In [None]:
def create_league_table(league):
    current_year = datetime.now().year
    
    if datetime.now().month < 8:
        start_date = f"{current_year - 1}-08-01"  
        end_date = f"{current_year}-07-01"     
    else:
        start_date = f"{current_year}-08-01"     
        end_date = f"{current_year + 1}-07-01" 

    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    matches_df = df[(df['Div'] == league) & 
                    (pd.to_datetime(df['Date']) >= start_date) & 
                    (pd.to_datetime(df['Date']) < end_date)]
    
    league_table = {}

    for index, match in matches_df.iterrows():
        home_team = match['HomeTeam']
        away_team = match['AwayTeam']
        home_goals = match['FTHG']
        away_goals = match['FTAG']


        if home_team not in league_table:
            league_table[home_team] = {'Played': 0, 'Won': 0, 'Drawn': 0, 'Lost': 0,
                                       'Goals For': 0, 'Goals Against': 0, 'Goal Difference': 0, 'Points': 0}
        if away_team not in league_table:
            league_table[away_team] = {'Played': 0, 'Won': 0, 'Drawn': 0, 'Lost': 0,
                                       'Goals For': 0, 'Goals Against': 0, 'Goal Difference': 0, 'Points': 0}


        league_table[home_team]['Played'] += 1
        league_table[home_team]['Goals For'] += home_goals
        league_table[home_team]['Goals Against'] += away_goals
        league_table[home_team]['Goal Difference'] += (home_goals - away_goals)


        league_table[away_team]['Played'] += 1
        league_table[away_team]['Goals For'] += away_goals
        league_table[away_team]['Goals Against'] += home_goals
        league_table[away_team]['Goal Difference'] += (away_goals - home_goals)

        if home_goals > away_goals:
            league_table[home_team]['Won'] += 1
            league_table[home_team]['Points'] += 3
            league_table[away_team]['Lost'] += 1
        elif home_goals < away_goals:
            league_table[away_team]['Won'] += 1
            league_table[away_team]['Points'] += 3
            league_table[home_team]['Lost'] += 1
        else:
            league_table[home_team]['Drawn'] += 1
            league_table[away_team]['Drawn'] += 1
            league_table[home_team]['Points'] += 1
            league_table[away_team]['Points'] += 1


    league_df = pd.DataFrame.from_dict(league_table, orient='index')

    league_df = league_df.sort_values(by=['Points', 'Goal Difference', 'Goals For'], ascending=False)

    return league_df

In [None]:
def create_league_table_for_current_year(league, df):
    current_year = datetime.now().year
    
    if datetime.now().month < 8:
        start_date = f"{current_year - 1}-08-01" 
        end_date = f"{current_year}-07-01"   
    else:
        start_date = f"{current_year}-08-01"      
        end_date = f"{current_year + 1}-07-01"   

    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    matches_df = df[(df['Div'] == league) & 
                    (pd.to_datetime(df['Date']) >= start_date) & 
                    (pd.to_datetime(df['Date']) < end_date)]
    
    return matches_df

In [146]:
create_league_table_for_current_year('E0', df).tail()

Unnamed: 0,index,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA,Time,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFH,BFD,BFA,1XBH,1XBD,1XBA,BFEH,BFED,BFEA,BFE>2.5,BFE<2.5,BFEAHH,BFEAHA,BFCH,BFCD,BFCA,1XBCH,1XBCD,1XBCA,BFECH,BFECD,BFECA,BFEC>2.5,BFEC<2.5,BFECAHH,BFECAHA
327,4,E0,2024-08-17,Newcastle,Southampton,1,0,H,1,0,H,C Pawson,3,19,1,4,15,16,3,12,2,4,1,0,1.36,5.25,8.0,1.35,5.5,7.75,,,,,,,1.35,5.7,8.25,1.35,5.5,8.0,,,,,,,,,,,,,,,,,,,,,,1.42,5.3,7.26,15:00,1.37,5.9,8.6,1.35,5.62,8.1,1.4,3.0,1.4,3.09,1.42,3.12,1.4,3.01,-1.5,1.98,1.95,1.94,1.96,1.99,1.98,1.93,1.93,1.4,5.0,7.0,1.39,5.25,7.25,,,,1.4,5.0,7.5,,,,1.44,5.75,8.0,1.39,5.27,7.33,1.44,2.75,1.46,2.85,1.46,3.05,1.43,2.84,-1.25,1.87,2.06,1.88,2.06,1.89,2.1,1.82,2.05,1.33,5.5,8.5,1.37,5.74,8.1,1.37,6.0,9.2,1.43,3.15,1.94,2.01,1.36,5.0,7.5,1.39,5.34,7.9,1.43,5.5,8.2,1.49,2.98,1.89,2.1
328,3,E0,2024-08-17,Everton,Brighton,0,3,A,0,1,A,S Hooper,9,10,1,5,8,8,1,5,1,1,1,0,2.63,3.3,2.63,2.65,3.4,2.6,,,,,,,2.73,3.36,2.71,2.6,3.5,2.7,,,,,,,,,,,,,,,,,,,,,,3.15,3.41,2.4,15:00,2.76,3.66,2.78,2.67,3.41,2.68,1.8,2.0,1.83,2.05,1.85,2.08,1.81,2.04,0.0,1.96,1.97,1.96,1.94,1.97,1.97,1.94,1.94,3.1,3.4,2.3,3.0,3.4,2.37,,,,3.1,3.3,2.4,,,,3.16,3.5,2.45,3.06,3.4,2.38,1.93,1.97,1.93,1.97,1.95,2.0,1.89,1.96,0.25,1.87,2.06,1.86,2.07,1.92,2.1,1.83,2.04,2.7,3.4,2.63,2.68,3.66,2.63,2.78,3.5,2.78,1.88,2.08,1.99,1.99,3.0,3.3,2.3,3.16,3.47,2.34,3.15,3.55,2.46,1.94,2.04,1.88,2.11
329,2,E0,2024-08-17,Arsenal,Wolves,2,0,H,1,0,H,J Gillett,18,9,6,3,17,14,8,2,2,2,0,0,1.18,7.5,13.0,1.2,7.5,13.5,,,,,,,1.16,8.56,16.22,1.18,7.0,17.0,,,,,,,,,,,,,,,,,,,,,,1.15,9.05,18.76,15:00,1.2,9.1,18.0,1.18,7.86,15.87,1.44,2.75,1.46,2.79,1.5,2.82,1.46,2.7,-2.0,1.93,2.0,1.88,2.0,1.97,2.0,1.9,1.96,1.14,8.5,15.0,1.16,8.5,18.0,,,,1.15,8.0,19.0,,,,1.17,9.4,21.0,1.15,8.62,18.11,1.4,3.0,1.41,2.98,1.45,3.0,1.42,2.93,-2.25,2.02,1.91,2.0,1.9,2.05,1.93,1.99,1.87,1.15,8.0,16.0,1.2,7.65,16.0,1.19,9.0,18.0,1.53,2.84,1.94,2.0,1.13,8.5,17.0,1.16,9.39,16.6,1.17,9.4,21.0,1.44,3.2,2.02,1.96
330,1,E0,2024-08-17,Ipswich,Liverpool,0,2,A,0,0,D,T Robinson,7,18,2,5,9,18,2,10,3,1,0,0,8.5,5.5,1.33,7.5,5.5,1.36,,,,,,,8.18,5.84,1.34,8.5,5.5,1.33,,,,,,,,,,,,,,,,,,,,,,8.14,6.09,1.34,12:30,9.0,6.1,1.37,8.28,5.76,1.34,1.4,3.0,1.41,3.0,1.43,3.07,1.41,2.94,1.5,2.02,1.91,1.99,1.92,2.02,1.95,1.97,1.9,8.0,5.75,1.33,8.0,5.75,1.34,,,,8.0,5.5,1.35,,,,8.57,6.25,1.39,7.87,5.81,1.35,1.36,3.2,1.37,3.3,1.4,3.38,1.37,3.18,1.5,2.05,1.88,2.04,1.9,2.2,2.0,1.99,1.88,8.5,5.5,1.33,8.6,5.85,1.35,9.4,6.2,1.36,1.45,3.15,2.04,1.93,7.5,5.5,1.33,8.57,5.85,1.34,8.6,6.2,1.37,1.4,3.4,2.04,1.93
331,0,E0,2024-08-16,Man United,Fulham,1,0,H,0,0,D,R Jones,14,10,5,2,12,10,7,8,2,3,0,0,1.6,4.2,5.25,1.6,4.4,5.25,,,,,,,1.63,4.38,5.3,1.65,4.2,5.0,,,,,,,,,,,,,,,,,,,,,,1.65,4.23,5.28,20:00,1.68,4.5,5.5,1.62,4.36,5.15,1.53,2.5,1.56,2.56,1.57,2.6,1.53,2.52,-1.0,2.05,1.88,2.07,1.86,2.07,1.89,2.03,1.85,1.67,4.1,5.0,1.65,4.2,4.8,,,,1.6,4.2,5.5,,,,1.7,4.33,5.5,1.66,4.2,5.02,1.62,2.3,1.63,2.38,1.66,2.45,1.61,2.37,-0.75,1.86,2.07,1.83,2.11,1.88,2.11,1.82,2.05,1.6,4.33,5.0,1.68,4.32,5.03,1.66,4.5,5.6,1.59,2.64,2.1,1.88,1.62,4.0,5.0,1.66,4.15,5.33,1.72,4.2,5.4,1.68,2.46,1.9,2.08


In [155]:
import pandas as pd

def get_avg_goals_for_home_and_away(league, home_team, away_team):
    matches_df = create_league_table_for_current_year(league, df)
    
    home_matches = matches_df[matches_df['HomeTeam'] == home_team]
    away_matches = matches_df[matches_df['AwayTeam'] == away_team]

    if len(home_matches) > 0:
        home_goals_avg = home_matches['FTHG'].mean() 
    else:
        home_goals_avg = 0


    if len(away_matches) > 0:
        away_goals_avg = away_matches['FTAG'].mean() 
    else:
        away_goals_avg = 0
    
    return {
        'home_team_avg_goals': home_goals_avg,
        'away_team_avg_goals': away_goals_avg
    }


result = get_avg_goals_for_home_and_away('E0', 'Man City', 'Arsenal')
print(result)

{'home_team_avg_goals': np.float64(2.2941176470588234), 'away_team_avg_goals': np.float64(1.7647058823529411)}


In [156]:
get_avg_goals_for_home_and_away('E0', 'Arsenal', 'Aston Villa')

{'home_team_avg_goals': np.float64(1.9411764705882353),
 'away_team_avg_goals': np.float64(1.3529411764705883)}

In [158]:
def get_home_and_away_form(df, home_team, away_team, num_matches=10):
    home_matches = df[df["HomeTeam"] == home_team].sort_values(by="Date", ascending=False).head(num_matches)
    away_matches = df[df["AwayTeam"] == away_team].sort_values(by="Date", ascending=False).head(num_matches)

    home_form = []
    for _, row in home_matches.iterrows():
        if row["FTHG"] > row["FTAG"]:
            home_form.append("W")
        elif row["FTHG"] == row["FTAG"]:
            home_form.append("D")
        else:
            home_form.append("L")

    away_form = []
    for _, row in away_matches.iterrows():
        if row["FTAG"] > row["FTHG"]:
            away_form.append("W")
        elif row["FTAG"] == row["FTHG"]:
            away_form.append("D")
        else:
            away_form.append("L")

    return {"home_form": home_form[::-1], "away_form": away_form[::-1]}

def get_team_data(league_df, home_team, away_team):
    response = {}
    avg_goals = get_avg_goals_for_home_and_away(league_df, home_team, away_team)
    response["avg_goals"] = avg_goals

    home_away_form = get_home_and_away_form(league_df, home_team, away_team)
    response["home_away_form"] = home_away_form

    # ... rest of the existing code in get_team_data ...
    return response

In [160]:
get_team_data(df, 'Tottenham', 'Arsenal')

ValueError: Operands are not aligned. Do `left, right = left.align(right, axis=1, copy=False)` before operating.