In [1]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Read the dataset into a pandas dataframe
df = pd.read_csv('t10_data.csv')

# Convert any ranges of two values in 'predicted_fantasy_points' to the average of the two values
df['predicted_fantasy_points'] = df['predicted_fantasy_points'].apply(lambda x: sum(map(int, x.split('-')))/2 if '-' in x else int(x))

# Replace any invalid value with the mean of the 'predicted_fantasy_points' column
df['predicted_fantasy_points'] = pd.to_numeric(df['predicted_fantasy_points'], errors='coerce')
df['predicted_fantasy_points'] = df['predicted_fantasy_points'].fillna(df['predicted_fantasy_points'].mean())

# Convert 'predicted_fantasy_points', 'form', and 'dream_team_percentage' columns to numeric data type
df['predicted_fantasy_points'] = pd.to_numeric(df['predicted_fantasy_points'])
df['form'] = pd.to_numeric(df['form'])
df['dream_team_percentage'] = pd.to_numeric(df['dream_team_percentage'])

# Drop any rows with missing data or null values
df = df.dropna()

# Define the target variable and the features
X = df[['predicted_fantasy_points', 'form', 'dream_team_percentage']]
y = df['total_fantasy_points']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model on the training set
reg = LinearRegression().fit(X_train, y_train)

# Evaluate the performance of the model on the testing set
y_pred = reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r_squared = reg.score(X_test, y_test)
print("Mean squared error: ", mse)
print("R-squared: ", r_squared)

# Use the trained model to predict the weighing coefficients
predicted_weights = reg.coef_

# Create a new column 'rank' by calculating the weighted average of 'predicted_fantasy_points', 'form', and 'dream_team_percentage' columns
df['rank'] = predicted_weights[0] * df['predicted_fantasy_points'] + predicted_weights[1] * df['form'] + predicted_weights[2] * df['dream_team_percentage']

# Sort the dataframe based on the 'rank' column in descending order
df = df.sort_values('rank', ascending=False)

# Randomly select 20 combinations of players with 11 players in each combination
random_combinations = []
for i in range(20):
    players = random.sample(df['players'].tolist(), 11)
    ranking = sum(df.loc[df['players'].isin(players)]['rank'])
    random_combinations.append((ranking, players))

# Print the random 20 combinations with their total ranking
for i, combination in enumerate(random_combinations):
    print(f"Rank {i+1}: Total Ranking = {combination[0]}")
    print(combination[1])
    print()


KeyError: 'total_fantasy_points'