In [3]:
import numpy as np
import pandas as pd

data = {
    "Date": [
        "2022-05-01",
        "2022-05-10",
        "2022-05-15",
        "2022-05-20",
        "2022-05-25",
        "2022-05-31",
        "2022-06-1",
    ],
    "HomeTeam": ["A", "B", "A", "C", "B", "A", "A"],
    "AwayTeam": ["B", "C", "C", "A", "A", "B", "B"],
    "FTHG": [1, 0, 2, 2, 1, 1, 1],
}

df = pd.DataFrame(data)


def calculate_rolling_averages(df, n=5, n_same_team=2):
    # Columns to exclude
    exclude_columns = [
        "HTR",
        "FTR",
        "Date",
        "HomeTeam",
        "AwayTeam",
        "Referee",
    ]

    # Columns to calculate rolling averages for
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    columns_to_calculate = [
        col for col in numeric_columns if col not in exclude_columns
    ]

    # Sort the DataFrame by Date
    df = df.sort_values(by="Date")

    # Calculate rolling mean for the last n matches for each team
    for col in columns_to_calculate:
        home_col_name = f"Home{col}_all_avg_{n}"
        away_col_name = f"Away{col}_all_avg_{n}"

        df[home_col_name] = df.groupby("HomeTeam")[col].transform(
            lambda x: x.rolling(window=n, min_periods=n).mean().shift()
        )
        df[away_col_name] = df.groupby("AwayTeam")[col].transform(
            lambda x: x.rolling(window=n, min_periods=n).mean().shift()
        )

    for col in columns_to_calculate:
        home_col_name = f"Home{col}_same_avg_{n_same_team}"
        away_col_name = f"Away{col}_same_avg_{n_same_team}"
        if col in ["FTHG", "HTHG", "HS", "HST", "HC", "HF", "HY", "HR"]:
            df[home_col_name] = df.groupby(["HomeTeam", "AwayTeam"])[col].transform(
                lambda x: x.rolling(window=n_same_team, min_periods=n_same_team)
                .mean()
                .shift()
            )
        else:
            df[away_col_name] = df.groupby(["AwayTeam", "HomeTeam"])[col].transform(
                lambda x: x.rolling(window=n_same_team, min_periods=n_same_team)
                .mean()
                .shift()
            )

    return df.dropna()


df = calculate_rolling_averages(df, n=2, n_same_team=1)
df

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,HomeFTHG_all_avg_2,AwayFTHG_all_avg_2,HomeFTHG_same_avg_1
6,2022-06-1,A,B,1,1.5,1.0,1.0
