# Fantasy Premier League team prediction
### Intro

Attempt to use some sort of AI to generate the optimal team, along with transfer and chip strategy eventually. Data from https://github.com/vaastav/Fantasy-Premier-League/.

First cloning the dataset:
```
git clone https://github.com/vaastav/Fantasy-Premier-League.git
```

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from pulp import LpMaximize, LpProblem, LpVariable, lpSum
import pyomo.environ as pyo
from pyomo.opt import SolverFactory
import chardet
import os

### Data prep

In [23]:
def find_gws_directory(base_directory):
    # Walk through the base directory to find the 'gws' folder
    for root, dirs, files in os.walk(base_directory):
        if 'gws' in dirs:
            return os.path.join(root, 'gws')
    return None

def load_and_bind_csvs(base_directory):
    # Find the 'gws' directory
    gws_directory = find_gws_directory(base_directory)
    if not gws_directory:
        print("Could not find 'gws' directory")
        return None
    
    # Initialize an empty dictionary to hold dataframes by their columns set
    dataframes_by_columns = {}

    # Loop over all files in the 'gws' directory
    for filename in os.listdir(gws_directory):
        if filename.endswith('.csv'):
            filepath = os.path.join(gws_directory, filename)
            
            # Detect encoding
            with open(filepath, 'rb') as f:
                result = chardet.detect(f.read())
            encoding = result['encoding']
            
            # Read CSV file
            try:
                df = pd.read_csv(filepath, encoding=encoding)
            except Exception as e:
                print(f"Error reading {filename}: {e}")
                continue
            
            # Add a column for the filepath including the name
            df['source_file'] = filepath
            
            # Get a set of the columns
            columns_set = frozenset(df.columns)
            
            # Bind similar CSVs together by their columns set
            if columns_set in dataframes_by_columns:
                dataframes_by_columns[columns_set].append(df)
            else:
                dataframes_by_columns[columns_set] = [df]
    
    # Concatenate dataframes with the same columns
    final_dataframes = [pd.concat(dfs, ignore_index=True) for dfs in dataframes_by_columns.values()]
    
    # Concatenate all dataframes into a single dataframe
    final_df = pd.concat(final_dataframes, ignore_index=True, sort=False)
    
    return final_df

# Specify the base directory containing the varying year folders
base_directory = '../Fantasy-Premier-League/data'

# Load and bind the CSV files
final_df = load_and_bind_csvs(base_directory)

# Display the head of the final dataframe
if final_df is not None:
    print(final_df.head())


              name  assists  attempted_passes  big_chances_created  \
0  Aaron_Cresswell        0                38                    0   
1     Aaron_Lennon        0                12                    0   
2       Aaron_Mooy        0                82                    0   
3   Aaron_Ramsdale        0                 0                    0   
4     Aaron_Ramsey        0                53                    0   

   big_chances_missed  bonus  bps  clean_sheets  \
0                   0      0   22             1   
1                   0      0    2             0   
2                   0      0   14             1   
3                   0      0    0             0   
4                   0      0   12             0   

   clearances_blocks_interceptions  completed_passes  ...  total_points  \
0                                8                28  ...             5   
1                                1                 6  ...             1   
2                                0             

In [26]:
final_df.columns

Index(['name', 'assists', 'attempted_passes', 'big_chances_created',
       'big_chances_missed', 'bonus', 'bps', 'clean_sheets',
       'clearances_blocks_interceptions', 'completed_passes', 'creativity',
       'dribbles', 'ea_index', 'element', 'errors_leading_to_goal',
       'errors_leading_to_goal_attempt', 'fixture', 'fouls', 'goals_conceded',
       'goals_scored', 'ict_index', 'id', 'influence', 'key_passes',
       'kickoff_time', 'kickoff_time_formatted', 'loaned_in', 'loaned_out',
       'minutes', 'offside', 'open_play_crosses', 'opponent_team', 'own_goals',
       'penalties_conceded', 'penalties_missed', 'penalties_saved',
       'recoveries', 'red_cards', 'round', 'saves', 'selected', 'tackled',
       'tackles', 'target_missed', 'team_a_score', 'team_h_score', 'threat',
       'total_points', 'transfers_balance', 'transfers_in', 'transfers_out',
       'value', 'was_home', 'winning_goals', 'yellow_cards', 'source_file',
       'GW'],
      dtype='object')