<a href="https://colab.research.google.com/github/Garrett-Reed/5961/blob/main/Model3_updated.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load datasets
tracking_data_path = 'Player_tracking_data.csv'
training_data_path = 'Training Data.csv'

tracking_data = pd.read_csv(tracking_data_path)
training_data = pd.read_csv(training_data_path)

# Step 1: Align Positions
# Map positions in `training_data` to full names
position_mapping = {'DF': 'Defender', 'MF': 'Midfielder', 'FW': 'Forward'}
training_data['Position'] = training_data['Position'].map(position_mapping)

# Combine all positions for encoding
all_positions = pd.concat([tracking_data['Position'], training_data['Position']]).unique()
position_encoder = {pos: idx for idx, pos in enumerate(all_positions)}
tracking_data['Position'] = tracking_data['Position'].map(position_encoder)
training_data['Position'] = training_data['Position'].map(position_encoder)

# Step 2: Convert Game Date
training_data['Game Date'] = pd.to_datetime(training_data['Game Date'], format='%m/%d/%y')
training_data['Game Date'] = training_data['Game Date'].apply(lambda x: x.toordinal())

# Step 3: Encode Player Names
# Combine all players for encoding
#all_players = pd.concat([tracking_data['Player'], training_data['Player']]).unique()
#player_encoder = {name: idx for idx, name in enumerate(all_players)}
#tracking_data['Player'] = tracking_data['Player'].map(player_encoder)
#training_data['Player'] = training_data['Player'].map(player_encoder)

# Step 4: Define Time Intervals
time_intervals = [(1, 45), (46, 70), (71, 90)]

# Results dictionary to store the best player for each position and time interval
results = {}

# Find the player(s) ready to be subbed out (100% exertion level)
subbed_out_players = tracking_data[tracking_data['% to Limit'] >= 100]

if subbed_out_players.empty:
    raise ValueError("No player is ready to be substituted at 100% exertion.")

# Iterate over each player ready to be subbed out
for _, player_data in subbed_out_players.iterrows():
    position = player_data['Position']
    match_time_step = player_data['Match Time Step']

    # Determine the time interval for the match time step
    for start, end in time_intervals:
        if start <= match_time_step <= end:
            interval = (start, end)
            break
    else:
        continue  # Skip if no matching interval is found

    # Debugging the filters
    print(f"\nFiltering for Time Interval: {start}-{end}")

    # Filter training data for the time interval
    time_filtered = training_data[(training_data['Game Time'] >= start) & (training_data['Game Time'] <= end)]
    print(f"After filtering by time: {len(time_filtered)} rows")
    print(time_filtered)

    # Filter by position
    position_filtered = time_filtered[time_filtered['Position'] == position]
    print(f"After filtering by position: {len(position_filtered)} rows")
    print(position_filtered)

    # Filter by 'On Field' and 'Team Goal Scored'
    final_filtered = position_filtered[(position_filtered['On Field'] == 1) & (position_filtered['Team Goal Scored'] == 1)]
    print(f"After filtering by 'On Field' and 'Team Goal Scored': {len(final_filtered)} rows")
    print(final_filtered)

    if final_filtered.empty:
        print("No data available for this condition. Skipping...")
        continue

    # Features (X) and target (y)
    X = final_filtered[['Game Date', 'On Field', 'Team Goal Scored']]
    y = final_filtered['Player']

    # Debugging X and y
    print("X DataFrame Shape:", X.shape)
    print("y Series Shape:", y.shape)
    print("First few rows of X:\n", X.head())
    print("First few values of y:\n", y.head())

    # Handle empty or insufficient data
    if len(X) <= 1:
        print(f"Not enough samples ({len(X)}) to perform train-test split. Skipping...")
        continue

    # Drop missing values
    X = X.dropna()
    y = y[X.index]  # Ensure alignment with X after dropping

    # Split data into training and testing sets
    try:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        print("Train and test split successful.")
    except ValueError as e:
        print(f"Error during train_test_split: {e}. Skipping this iteration...")
        continue

    # Train the model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Predict probabilities on the test set
    probas = model.predict_proba(X_test)

    # Identify the best player (highest average probability)
    avg_probas = probas.mean(axis=0)
    best_player_index = np.argmax(avg_probas)
    best_player_name = [name for name, idx in player_encoder.items() if idx == best_player_index][0]

    # Calculate model accuracy
    accuracy = accuracy_score(y_test, model.predict(X_test))

    # Store the result
    results[(position, f"{start}-{end}")] = {
        'Best Player': best_player_name,
        'Accuracy': accuracy
    }

# Display the results
print("\nBest Player for Each Position and Time Interval:")
for (position, time_interval), result in results.items():
    print(f"Position {position}, Time Interval {time_interval}: Best Player - {result['Best Player']} (Accuracy: {result['Accuracy']:.2f})")



Filtering for Time Interval: 46-70
After filtering by time: 725 rows
                   Player  Game Date  Position  Game Time  On Field  \
45           Kyle Heibert     739021         2         46         1   
46           Kyle Heibert     739021         2         47         1   
47           Kyle Heibert     739021         2         48         1   
48           Kyle Heibert     739021         2         49         1   
49           Kyle Heibert     739021         2         50         1   
...                   ...        ...       ...        ...       ...   
2585  Jake Girdwood-Reich     739130         2         66         0   
2586  Jake Girdwood-Reich     739130         2         67         0   
2587  Jake Girdwood-Reich     739130         2         68         0   
2588  Jake Girdwood-Reich     739130         2         69         0   
2589  Jake Girdwood-Reich     739130         2         70         0   

      Team Goal Scored  
45                   0  
46                   0  
47