<a href="https://colab.research.google.com/github/Garrett-Reed/5961/blob/main/Model-1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

file_path = '/mnt/data/Training Data.csv'
data = pd.read_csv('Training Data.csv')

# Preprocessing: Convert 'Game Date' to ordinal and encode categorical features
data['Game Date'] = pd.to_datetime(data['Game Date']).apply(lambda x: x.toordinal())
data['Position'] = data['Position'].astype('category').cat.codes

# Encode target variable ('Player') using LabelEncoder
le = LabelEncoder()
data['Player'] = le.fit_transform(data['Player'])

# Define time intervals
time_intervals = [(1, 45), (46, 70), (71, 90)]

# Results dictionary to store the best player for each position and time interval
results = {}

# Iterate over each position
for position in data['Position'].unique():
    position_data = data[data['Position'] == position]  # Filter by position

    # Iterate over each time interval
    for start, end in time_intervals:
        interval_data = position_data[
            (position_data['Game Time'] >= start) & (position_data['Game Time'] <= end)
        ]  # Filter by time interval

        # Check if there is enough data for training
        if len(interval_data) < 10:  # Arbitrary threshold to ensure enough samples
            continue

        # Features (X) and target (y)
        X = interval_data[['Game Date', 'On Field', 'Team Goal Scored']]
        y = interval_data['Player']

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train the model
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)

        # Predict probabilities on the test set
        probas = model.predict_proba(X_test)

        # Identify the best player (highest average probability)
        avg_probas = probas.mean(axis=0)
        best_player_index = np.argmax(avg_probas)
        best_player_name = le.inverse_transform([best_player_index])[0]

        # Store the result
        results[(position, f"{start}-{end}")] = {
            'Best Player': best_player_name,
            'Accuracy': accuracy_score(y_test, model.predict(X_test))
        }

# Display the results
print("Best Player for Each Position and Time Interval:")
for (position, time_interval), result in results.items():
    print(f"Position {position}, Time Interval {time_interval}: Best Player - {result['Best Player']} (Accuracy: {result['Accuracy']:.2f})")


  data['Game Date'] = pd.to_datetime(data['Game Date']).apply(lambda x: x.toordinal())


Best Player for Each Position and Time Interval:
Position 0, Time Interval 1-45: Best Player - Aziel Jackson (Accuracy: 0.63)
Position 0, Time Interval 46-70: Best Player - Aziel Jackson (Accuracy: 0.58)
Position 0, Time Interval 71-90: Best Player - Jannes Horn (Accuracy: 0.86)
Position 1, Time Interval 1-45: Best Player - Hosei Kijima (Accuracy: 0.55)
Position 1, Time Interval 46-70: Best Player - Hosei Kijima (Accuracy: 0.69)
Position 1, Time Interval 71-90: Best Player - Hosei Kijima (Accuracy: 0.75)
Position 2, Time Interval 1-45: Best Player - Aziel Jackson (Accuracy: 0.60)
Position 2, Time Interval 46-70: Best Player - Aziel Jackson (Accuracy: 0.62)
Position 2, Time Interval 71-90: Best Player - Aziel Jackson (Accuracy: 0.69)
