In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

# Load data
matches = pd.read_csv("matches.csv", index_col=0)
matches["date"] = pd.to_datetime(matches["date"])

# Extract hour from time column
matches["hour"] = pd.to_datetime(matches["time"]).dt.hour

# Convert result column to one-hot encoded columns for win, loss, and draw
matches["target"] = matches["result"].map({"W": 1, "L": 0, "D": -1})

# Drop unnecessary columns
del matches["comp"]
del matches["notes"]

# Convert categorical columns to numerical codes
matches["venue_code"] = matches["venue"].astype("category").cat.codes
matches["opp_code"] = matches["opponent"].astype("category").cat.codes
matches["day_code"] = matches["date"].dt.dayofweek

# Define predictors
predictors = ["venue_code", "opp_code", "hour", "day_code"]

# Split data into training and testing sets
train = matches[matches["date"] < '2024-01-01']
test = matches[matches["date"] > '2024-01-01']

# Initialize and train Random Forest Classifier
rf = RandomForestClassifier(n_estimators=190, min_samples_split=10, random_state=1)
rf.fit(train[predictors], train["target"])

# Make predictions
preds = rf.predict(test[predictors])

# Calculate accuracy and precision
error = accuracy_score(test["target"], preds)
precision = precision_score(test["target"], preds, average='weighted')

# Print error and precision
print("Accuracy:", error)
print("Precision:", precision)

# Display confusion matrix
combined = pd.DataFrame({"actual": test["target"], "predicted": preds})
pd.crosstab(index=combined["actual"], columns=combined["predicted"])


Accuracy: 0.42063492063492064
Precision: 0.41627990568207957


predicted,-1,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-1,8,15,9
0,9,22,16
1,6,18,23


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

def update_score_table(score_table, match_outcomes):
    # Update the score table based on match outcomes
    for match_outcome in match_outcomes:
        home_team = match_outcome['home_team']
        away_team = match_outcome['away_team']
        result = match_outcome['result']
        if result == 1:  # Home team wins
            score_table.loc[score_table['Club'] == home_team, 'Pts'] += 3
            score_table.loc[score_table['Club'] == home_team, 'W'] += 1
            score_table.loc[score_table['Club'] == away_team, 'L'] += 1
            score_table.loc[score_table['Club'] == home_team, 'Pld'] +=1
            score_table.loc[score_table['Club'] == away_team, 'Pld'] +=1
        elif result == -1:  # Away team wins
            score_table.loc[score_table['Club'] == away_team, 'Pts'] += 3
            score_table.loc[score_table['Club'] == away_team, 'W'] += 1
            score_table.loc[score_table['Club'] == home_team, 'L'] += 1
            score_table.loc[score_table['Club'] == home_team, 'Pld'] +=1
            score_table.loc[score_table['Club'] == away_team, 'Pld'] +=1
        elif result == 0:  # Draw
            score_table.loc[score_table['Club'] == home_team, 'Pts'] += 1
            score_table.loc[score_table['Club'] == home_team, 'D'] += 1
            score_table.loc[score_table['Club'] == away_team, 'Pts'] += 1
            score_table.loc[score_table['Club'] == away_team, 'D'] += 1
            score_table.loc[score_table['Club'] == home_team, 'Pld'] +=1
            score_table.loc[score_table['Club'] == away_team, 'Pld'] +=1
    return score_table

# Load data
score_table = pd.read_csv("score_table.csv")

# Initialize Random Forest Classifier
rf = RandomForestClassifier(n_estimators=190, min_samples_split=10, random_state=1)

# Loop through each round of fixtures
for round_num in range(34,35):
    Fixtures_pre = pd.read_csv("fixtures.csv", index_col=0)
    Fixtures_pre["date"] = pd.to_datetime(Fixtures_pre["date"])

    # Extract hour from time column
    Fixtures_pre["hour"] = pd.to_datetime(Fixtures_pre["time"]).dt.hour

    # Convert result column to one-hot encoded columns for win, loss, and draw
    Fixtures_pre["target"] = Fixtures_pre["result"].map({"W": 1, "L": 0, "D": -1})

    # Drop unnecessary columns
    del Fixtures_pre["comp"]
    del Fixtures_pre["notes"]

    # Convert categorical columns to numerical codes
    Fixtures_pre["venue_code"] = Fixtures_pre["venue"].astype("category").cat.codes
    Fixtures_pre["opp_code"] = Fixtures_pre["opponent"].astype("category").cat.codes
    Fixtures_pre["day_code"] = Fixtures_pre["date"].dt.dayofweek

    predi = ["venue_code", "opp_code", "hour", "day_code"]
    test = Fixtures_pre
    rf.fit(train[predictors], train["target"])
    preds = rf.predict(test[predi])

    # Predict match outcomes for current round
    match_outcomes = []
    for index, match in Fixtures_pre.iterrows():
        match_outcomes.append({
            'home_team': match['team'],    # Use 'match' object to extract team names
            'away_team': match['opponent'],  # Use 'match' object to extract opponent names
            'result': preds[index]
        })

    # Update score table based on match outcomes
    score_table = update_score_table(score_table, match_outcomes)

    # Print updated score table after each round
    print(f"      Score Table after Matchweek {round_num}:")
    print(score_table)
print(match["round"])

  Fixtures_pre["date"] = pd.to_datetime(Fixtures_pre["date"])


      Score Table after Matchweek 34:
    Rank            Club  Pld   W   D   L  Pts
0      1       St. Pauli   34  16  14   4   62
1      2   Holstein Kiel   34  13  14   7   53
2      3    Hamburger SV   34  12  11  11   47
3      4     Hannover 96   34  11  16   7   49
4      5  Greuther Fürth   34  12  14   8   50
5      6    Paderborn 07   34  13  10  11   49
6      7      Düsseldorf   34  12  15   7   51
7      8      Karlsruher   34  14  13   7   55
8      9      Hertha BSC   34  12  13   9   49
9     10        Nürnberg   34  10  14  10   44
10    11      Elversberg   34  10  12  12   42
11    12       Magdeburg   34   8  13  13   37
12    13           Wehen   34  10  12  12   42
13    14      Schalke 04   34   9   7  18   34
14    15  Kaiserslautern   34   9  12  13   39
15    16    Braunschweig   34   9  10  15   37
16    17   Hansa Rostock   34   8  11  15   35
17    18       Osnabrück   34   4  18  12   30
Matchweek 34
