In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the data
matches = pd.read_csv('matches_laliga.csv')

# Data preprocessing
# Assuming you need to encode teams and opponents, and convert dates
encoder = LabelEncoder()
matches['team'] = encoder.fit_transform(matches['team'])
matches['opponent'] = encoder.fit_transform(matches['opponent'])
matches['date'] = pd.to_datetime(matches['date'])

# Prepare the model
# Select predictors - adjust these as per your dataset
matches['venue_code'] = encoder.fit_transform(matches['venue'])
predictors = ['team', 'opponent', 'venue_code', 'gf', 'ga', 'xg', 'xga', 'sh', 'sot']
matches['result'] = matches['result'].map({'W': 1, 'D': 0, 'L': -1})  # Encoding results
X = matches[predictors]
y = matches['result']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Train the RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

# Predict outcomes for the entire dataset (as an example)
matches['predicted_result'] = model.predict(X)

# Function to simulate the season and compile results
def simulate_season(matches):
    league_table = pd.DataFrame(columns=['team', 'mp', 'w', 'd', 'l', 'points'])

    for team in matches['team'].unique():
        team_matches = matches[(matches['team'] == team) | (matches['opponent'] == team)]
        team_matches = team_matches.head(38)  # Consider only the first 38 matches

        w = d = l = 0
        for index, row in team_matches.iterrows():
            if row['predicted_result'] == 1 and row['team'] == team:
                w += 1
            elif row['predicted_result'] == -1 and row['team'] != team:
                w += 1
            elif row['predicted_result'] == 0:
                d += 1
            else:
                l += 1

        points = w * 3 + d
        league_table = league_table.append({'team': team, 'mp': 38, 'w': w, 'd': d, 'l': l, 'points': points}, ignore_index=True)

    return league_table

# Simulate the season
league_table = simulate_season(matches)

# Apply team name mapping
team_name_mapping = {
    # Add your team name mappings here
    # e.g., 0: 'Real Madrid', 1: 'Barcelona', ...
}
league_table['team'] = league_table['team'].map(team_name_mapping)

# Sort and display the league table
league_table = league_table.sort_values(by='points', ascending=False)
print(league_table)

    team  mp   w   d   l points
0    NaN  38  30   5   3     95
3    NaN  38  26   9   3     87
2    NaN  38  25   4   9     79
5    NaN  38  18  11   9     65
6    NaN  38  15  15   8     60
1    NaN  38  16  12  10     60
4    NaN  38  16   9  13     57
11   NaN  38  14   7  17     49
9    NaN  38  12  10  16     46
15   NaN  38  11  12  15     45
18   NaN  38  12   9  17     45
16   NaN  38  10  11  17     41
7    NaN  38   8  13  17     37
20   NaN  38  10   7  21     37
12   NaN  38   9   9  20     36
13   NaN  38   9   9  20     36
10   NaN  38   8  10  20     34
8    NaN  38   8  10  20     34
23   NaN  38   8  10  20     34
14   NaN  38   5  15  18     30
22   NaN  38   7   8  23     29
21   NaN  38   5  10  23     25
17   NaN  38   3  12  23     21
19   NaN  38   1   7  30     10


In [6]:
league_table

Unnamed: 0,team,mp,w,d,l,points
0,,38,30,5,3,95
3,,38,26,9,3,87
2,,38,25,4,9,79
5,,38,18,11,9,65
6,,38,15,15,8,60
1,,38,16,12,10,60
4,,38,16,9,13,57
11,,38,14,7,17,49
9,,38,12,10,16,46
15,,38,11,12,15,45
