In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [None]:
#Load merged dataset
data = pd.read_csv('team_data.csv')  

#Select features 
features = data[['WAR', 'OPS', 'ERA', 'DefensiveRunsSaved']]
target = data['TeamWins']  # Target variable (team success)

#Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

#Build Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict and evaluate
predictions = rf_model.predict(X_test)
print("MSE:", mean_squared_error(y_test, predictions))
print("R^2 Score:", r2_score(y_test, predictions))


In [None]:
# Position importance to identify weak spots
importances = rf_model.position_importances_
position_importance_df = pd.DataFrame({'Feature': features.columns, 'Importance': importances})
position_importance_df = position_importance_df.sort_values(by='Importance', ascending=False)

print(position_importance_df)