<a href="https://colab.research.google.com/github/Ddhawan2003/FplAnalytics/blob/main/KnapsackFPL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import pandas as pd
import requests

# Step 1: Load the existing CSV file
csv_file_path = 'updated_players.csv'
updated_players_df = pd.read_csv(csv_file_path)

# Step 2: Fetch the player prices from the Fantasy Premier League API
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    players = data['elements']

    # Create a dictionary to map player names to their now_cost
    player_prices = {
        f"{player['first_name']} {player['second_name']}": player['now_cost'] / 10.0  # Convert to millions
        for player in players
    }

    # Update the prices in the DataFrame
    updated_players_df['now_cost'] = updated_players_df['first_name'] + ' ' + updated_players_df['second_name']
    updated_players_df['now_cost'] = updated_players_df['now_cost'].map(player_prices).fillna(updated_players_df['now_cost'])

    # Step 3: Save the updated DataFrame back to CSV
    updated_players_df.to_csv(csv_file_path, index=False)
    print("Updated player prices successfully.")
else:
    print(f"Failed to retrieve data: {response.status_code}")


if 'price' in updated_players_df.columns:
    updated_players_df.drop(columns=['now_cost'], inplace=True)


Updated player prices successfully.


In [10]:
import pandas as pd

# Load the player data from the CSV file
players_df = pd.read_csv('cleaned_players.csv', delimiter=',')  # Change delimiter if necessary

def calculate_predicted_points(row):
    points = 0

    # Minutes played points
    if row['minutes'] >= 60:
        points += 2
    elif row['minutes'] > 0:
        points += 1

    # Goals scored points
    if row['element_type'] == 'GK':
        points += row['goals_scored'] * 10  # Goalkeeper goals
        points += row['clean_sheets'] * 4  # Clean sheets for GK
        points += row['goals_conceded'] // 2 * -1  # Goals conceded penalties
        points += row['bps'] // 3  # Bonus points from BPS
    elif row['element_type'] == 'DEF':
        points += row['goals_scored'] * 6  # Defender goals
        points += row['clean_sheets'] * 4  # Clean sheets for DEF
        points += row['goals_conceded'] // 2 * -1  # Goals conceded penalties
        points += row['bps'] // 3  # Bonus points from BPS
    elif row['element_type'] == 'MID':
        points += row['goals_scored'] * 5  # Midfielder goals
        points += row['assists'] * 3  # Assists points for MID
        points += row['clean_sheets'] * 1  # Clean sheets for MID
        points += row['bps'] // 3  # Bonus points from BPS
    elif row['element_type'] == 'FWD':
        points += row['goals_scored'] * 4  # Forward goals
        points += row['assists'] * 3  # Assists points for FWD
        points += row['bps'] // 3  # Bonus points from BPS

    # Penalties, cards, and own goals penalties
    points += row['yellow_cards'] * -1
    points += row['red_cards'] * -3

    return points

# Apply the function to each row to calculate predicted points
players_df['predicted_points'] = players_df.apply(calculate_predicted_points, axis=1)

# Save the updated DataFrame back to CSV
players_df.to_csv('updated_players.csv', index=False)

print("Predicted points have been calculated and saved to 'updated_players.csv'.")


Predicted points have been calculated and saved to 'updated_players.csv'.


In [27]:
import pandas as pd
import pulp

# Load the updated_players.csv file
csv_file_path = 'updated_players.csv'
updated_players_df = pd.read_csv(csv_file_path)

# Convert now_cost to price (the cost is in millions, so we divide by 10)
updated_players_df['price'] = updated_players_df['now_cost']

# Create a list of players with relevant information
players = []
for index, row in updated_players_df.iterrows():
    players.append({
        'name': f"{row['first_name']} {row['second_name']}",
        'points': row['predicted_points'],
        'price': row['price'],
        'position': row['element_type'],  # Assuming element_type corresponds to position
    })

# Initialize the problem
problem = pulp.LpProblem("Best_Premier_League_Team", pulp.LpMaximize)

# Create decision variables
x = pulp.LpVariable.dicts("player", range(len(players)), cat="Binary")

# Objective function: maximize total predicted points
problem += pulp.lpSum([x[i] * players[i]['points'] for i in range(len(players))])

# Budget constraint
problem += pulp.lpSum([x[i] * players[i]['price'] for i in range(len(players))]) <= 100

# Position constraints
problem += pulp.lpSum([x[i] for i in range(len(players)) if players[i]['position'] == 'GK']) == 2
problem += pulp.lpSum([x[i] for i in range(len(players)) if players[i]['position'] == 'DF']) <= 5
problem += pulp.lpSum([x[i] for i in range(len(players)) if players[i]['position'] == 'MF']) <= 5
problem += pulp.lpSum([x[i] for i in range(len(players)) if players[i]['position'] == 'FW']) <= 3

# Solve the problem
problem.solve()

# Get the selected team
selected_team = [players[i]['name'] for i in range(len(players)) if x[i].value() == 1]
total_points = pulp.value(problem.objective)

# Output the results
print(f"Selected team: {selected_team}")
print(f"Total predicted points: {total_points}")


Selected team: ['Bryan Mbeumo', 'Danny Welbeck', 'Noni Madueke', 'Cole Palmer', 'Dwight McNeil', 'Emile Smith Rowe', 'Wilfred Ndidi', 'Alisson Ramses Becker', 'Ibrahima Konaté', 'Luis Díaz', 'Virgil van Dijk', 'Mateo Kovačić', 'Diogo Dalot Teixeira', 'André Onana', 'James Maddison', 'Cristian Romero']
Total predicted points: 1217.0


In [38]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Step 1: Load the data
file_path = 'merged_gw.csv'  # Update this to your CSV file path
data = pd.read_csv(file_path)

# Step 2: Preprocess the data
# Convert 'kickoff_time' to datetime format
data['kickoff_time'] = pd.to_datetime(data['kickoff_time'])

# Sort by player and time
data.sort_values(by=['name', 'kickoff_time'], inplace=True)

# Calculate rolling averages for relevant features
data['rolling_xP'] = data.groupby('name')['xP'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
data['rolling_assists'] = data.groupby('name')['assists'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
data['rolling_goals'] = data.groupby('name')['goals_scored'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
data['rolling_bonus'] = data.groupby('name')['bonus'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
data['rolling_clean_sheets'] = data.groupby('name')['clean_sheets'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())

# Step 3: Create the feature set (X) and target variable (y)
# Select features for the model
features = ['rolling_xP', 'rolling_assists', 'rolling_goals',
            'rolling_bonus', 'rolling_clean_sheets', 'bps',
            'creativity', 'threat', 'minutes', 'position']  # Add more features as necessary

# Convert categorical variable (position) to dummy/indicator variables
X = pd.get_dummies(data[features], drop_first=True)

# Target variable: points for the next game week
data['next_gw_points'] = data['total_points'].shift(-1)

y = data['next_gw_points'].dropna()

# Align X with y
X = X.iloc[:-1]  # Remove the last row which will not have a target variable

# Step 4: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Step 6: Make predictions for the next game week (GW8)
y_pred = rf_model.predict(X)

# Create a DataFrame for predictions
predictions = pd.DataFrame({
    'Player': data['name'].iloc[:-1],  # Exclude the last row which doesn't have a target
    'Predicted Points': y_pred
})

# Step 7: Display the top players with the most predicted points for GW8
top_predictions = predictions.sort_values(by='Predicted Points', ascending=False)

# Display the top 10 players
print("Top 10 Players Predicted Points for GW8:")
print(top_predictions.head(10))


Top 10 Players Predicted Points for GW8:
               Player  Predicted Points
2563      Cole Palmer             18.43
2369        Luis Díaz             13.97
232    Erling Haaland             13.36
3819  Michail Antonio             13.00
13        Cole Palmer             12.25
1070        Luis Díaz             11.83
757     Mohamed Salah             11.83
2047  Nicolas Jackson             11.63
851    Erling Haaland             11.42
3504      Bukayo Saka             11.18


In [47]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Step 1: Load current season data
current_file_path = 'merged_gw.csv'  # Update with your current season CSV file path
current_data = pd.read_csv(current_file_path)

# Step 2: Load last season's performance data
last_season_file_path = 'merged_gw (1).csv'  # Update with your last season CSV file path
last_season_data = pd.read_csv(last_season_file_path)

# Step 3: Preprocess last season's data
last_season_data['rolling_xP'] = last_season_data.groupby('name')['xP'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
last_season_data['rolling_assists'] = last_season_data.groupby('name')['assists'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
last_season_data['rolling_goals'] = last_season_data.groupby('name')['goals_scored'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
last_season_data['rolling_bonus'] = last_season_data.groupby('name')['bonus'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())
last_season_data['rolling_clean_sheets'] = last_season_data.groupby('name')['clean_sheets'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())

# Step 4: Select relevant features from last season's data
last_season_features = last_season_data[['name', 'rolling_xP', 'rolling_assists', 'rolling_goals', 'rolling_bonus', 'rolling_clean_sheets']]

# Step 5: Merge last season's features with current season data
current_data = current_data.merge(last_season_features, on='name', how='left', suffixes=('', '_last_season'))

# Step 6: Preprocess current season data
if 'kickoff_time' in current_data.columns:
    current_data['kickoff_time'] = pd.to_datetime(current_data['kickoff_time'])

# Calculate rolling averages for current season features
current_data['rolling_xP_current'] = current_data.groupby('name')['xP'].transform(lambda x: x.rolling(window=3, min_periods=1).mean())

# Step 7: Create the feature set (X) and target variable (y)
features = [
    'rolling_xP',
    'rolling_assists',
    'rolling_goals',
    'rolling_bonus',
    'rolling_clean_sheets',
    'rolling_xP_current',
    'bps',
    'creativity',
    'threat',
    'minutes',
    'position'  # Removed 'fixture_strength'
]

# Convert categorical variable (position) to dummy/indicator variables
X = pd.get_dummies(current_data[features], drop_first=True)

# Step 8: Create target variable: points for the next game week
current_data['next_gw_points'] = current_data['total_points'].shift(-1)

# Ensure there are no NaNs in the target variable
current_data.dropna(subset=['next_gw_points'], inplace=True)

# Align y with X
y = current_data['next_gw_points']
X = X.loc[y.index]  # Align X with y indices

# Step 9: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 10: Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate model performance
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

print("Training MSE:", mean_squared_error(y_train, y_train_pred))
print("Testing MSE:", mean_squared_error(y_test, y_test_pred))

# Step 11: Make predictions for the next game week (GW8)
y_pred = rf_model.predict(X)

# Create a DataFrame for predictions
predictions = pd.DataFrame({
    'Player': current_data['name'],  # Use all rows
    'Predicted Points': y_pred
})

# Step 11: Display the top players with the most predicted points for GW8
top_predictions = predictions.sort_values(by='Predicted Points', ascending=False)

# Step 12: Save the top 100 unique predictions into a CSV file
top_100_unique_names = top_predictions.drop_duplicates('Player').head(100)

# Save to CSV
output_file_path = 'top_100_player_predictions.csv'  # Path to save the CSV file
top_100_unique_names.to_csv(output_file_path, index=False)

print(f"Top 100 unique player predictions saved to {output_file_path}")

Training MSE: 0.16423303414911494
Testing MSE: 0.49920316519307295
Top 100 unique player predictions saved to top_100_player_predictions.csv


In [51]:
import requests
import pandas as pd

# Step 1: Fetch the fixtures data from the Fantasy Premier League API
fixtures_url = "https://fantasy.premierleague.com/api/fixtures/"
response = requests.get(fixtures_url)

# Check if the request was successful
if response.status_code == 200:
    fixtures_data = response.json()  # Parse JSON response
else:
    print("Failed to retrieve fixtures data:", response.status_code)
    fixtures_data = []

# Step 2: Fetch the events data
events_url = "https://fantasy.premierleague.com/api/events/"
events_response = requests.get(events_url)

# Check if the request was successful
if events_response.status_code == 200:
    events_data = events_response.json()  # Parse JSON response
else:
    print("Failed to retrieve events data:", events_response.status_code)
    events_data = []

# Step 3: Extract relevant fixture information for GW8
gw8_fixtures = []

# Fetch GW8 event ID
gw8_event_id = next((event['id'] for event in events_data if event['name'] == "Gameweek 8"), None)

if gw8_event_id is not None:
    # Iterate through the fixtures data
    for fixture in fixtures_data:
        if fixture['event'] == gw8_event_id:  # Filter for GW8
            fixture_info = {
                'Fixture ID': fixture['id'],
                'Home Team': fixture['team_a'],
                'Away Team': fixture['team_h'],
                'Kickoff Time': fixture['kickoff_time'],
                'Venue': fixture.get('venue', 'N/A'),
                'Home Team Score': fixture.get('team_a_score', None),
                'Away Team Score': fixture.get('team_h_score', None),
            }
            gw8_fixtures.append(fixture_info)
else:
    print("Gameweek 8 not found in events data.")

# Step 4: Convert GW8 fixtures to a DataFrame
gw8_fixtures_df = pd.DataFrame(gw8_fixtures)

# Step 5: Save the DataFrame to a CSV file
csv_file_path = 'gw8_fixtures.csv'
gw8_fixtures_df.to_csv(csv_file_path, index=False)

print(f"GW8 fixtures saved to {csv_file_path}.")


GW8 fixtures saved to gw8_fixtures.csv.
