<a href="https://colab.research.google.com/github/Garrett-Reed/5961/blob/main/Model3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load datasets
tracking_data = pd.read_csv('player_tracking_data.csv')  # Player tracking data
training_data = pd.read_csv('Training Data.csv')  # Training data

# Preprocessing for Training Data
training_data['Game Date'] = pd.to_datetime(training_data['Game Date']).apply(lambda x: x.toordinal())
training_data['Position'] = training_data['Position'].astype('category').cat.codes

# Encode target variable ('Player') using LabelEncoder
le = LabelEncoder()
training_data['Player'] = le.fit_transform(training_data['Player'])

# Define time intervals
time_intervals = [(1, 45), (46, 70), (71, 90)]

# Results dictionary to store the best player for each position and time interval
results = {}

# Find the player(s) ready to be subbed out (100% exertion level)
subbed_out_players = tracking_data[tracking_data['% to Limit'] >= 100]

#print("Subbed Out Players:\n", subbed_out_players)

if subbed_out_players.empty:
    raise ValueError("No player is ready to be substituted at 100% exertion.")

# Iterate over each player ready to be subbed out
for _, player_data in subbed_out_players.iterrows():
    position = player_data['Position']
    match_time_step = player_data['Match Time Step']

    # Determine the time interval for the match time step
    for start, end in time_intervals:
        if start <= match_time_step <= end:
            interval = (start, end)
            break
    else:
        continue  # Skip if no matching interval is found

    # Filter training data based on the conditions
    filtered_data = training_data[
        (training_data['Game Time'] >= start) &  # Time interval start
        (training_data['Game Time'] <= end) &   # Time interval end
        (training_data['Position'] == position) &  # Match position
        (training_data['On Field'] == 1) &         # Player is on the field
        (training_data['Team Goal Scored'] == 1)   # Team scored a goal
    ]

    print(filtered_data)

    # Check if there's enough data for training
    if len(filtered_data) < 1:
        continue  # Skip this interval if insufficient data

    # Features (X) and target (y)
    X = filtered_data[['Game Date', 'On Field', 'Team Goal Scored']]
    y = filtered_data['Player']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Predict probabilities on the test set
    probas = model.predict_proba(X_test)

    # Identify the best player (highest average probability)
    avg_probas = probas.mean(axis=0)
    best_player_index = np.argmax(avg_probas)
    best_player_name = le.inverse_transform([best_player_index])[0]

    # Calculate model accuracy
    accuracy = accuracy_score(y_test, model.predict(X_test))

    # Store the result
    results[(position, f"{start}-{end}")] = {
        'Best Player': best_player_name,
        'Accuracy': accuracy
    }

# Display the results
print("Best Player for Each Position and Time Interval:")
for (position, time_interval), result in results.items():
    print(f"Position {position}, Time Interval {time_interval}: Best Player - {result['Best Player']} (Accuracy: {result['Accuracy']:.2f})")


Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time, On Field, Team Goal Scored]
Index: []
Empty DataFrame
Columns: [Player, Game Date, Position, Game Time

  training_data['Game Date'] = pd.to_datetime(training_data['Game Date']).apply(lambda x: x.toordinal())
