## Football leagues 

### Importing the Data

In [4]:
#Importing libraries 
import pandas as pd
import numpy as np

In [None]:
fifa_df=pd.read_csv("fifa_df.csv")
fifa_df=fifa_df.reset_index(drop=False)

In [6]:
print(fifa_df.shape)
print(fifa_df['sofifa_id'].value_counts().count())
fifa_df.head()

(122841, 108)
41533


Unnamed: 0,index,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,...,ldm,cdm,rdm,rwb,lb,lcb,cb,rcb,rb,fifa_year
0,0,158023,https://sofifa.com/player/158023/lionel-messi/...,L. Messi,Lionel Andr�s Messi Cuccittini,27,1987-06-24,169,67,Argentina,...,62+3,62+3,62+3,62+3,54+3,45+3,45+3,45+3,54+3,2015
1,1,20801,https://sofifa.com/player/20801/c-ronaldo-dos-...,Cristiano Ronaldo,Cristiano Ronaldo dos Santos Aveiro,29,1985-02-05,185,80,Portugal,...,63+3,63+3,63+3,63+3,57+3,52+3,52+3,52+3,57+3,2015
2,2,9014,https://sofifa.com/player/9014/arjen-robben/15...,A. Robben,Arjen Robben,30,1984-01-23,180,80,Netherlands,...,64+3,64+3,64+3,64+3,55+3,46+3,46+3,46+3,55+3,2015
3,3,41236,https://sofifa.com/player/41236/zlatan-ibrahim...,Z. Ibrahimovi?,Zlatan Ibrahimovi?,32,1981-10-03,195,95,Sweden,...,65+3,65+3,65+3,61+3,56+3,55+3,55+3,55+3,56+3,2015
4,4,167495,https://sofifa.com/player/167495/manuel-neuer/...,M. Neuer,Manuel Neuer,28,1986-03-27,193,92,Germany,...,40+3,40+3,40+3,36+3,36+3,38+3,38+3,38+3,36+3,2015


### Finding number of connected components following the below rules 
1. Two players are in the same club 
2. They were in the same national team in the same year 

### Creating the nationality flag

In [8]:
#Jersey number is only present for players who played FIFA
#Checking if the jersey number is null 
d1=fifa_df['nation_jersey_number'].isnull()
#Only storing unique nationalities for which the FIFA is present 
fifa_nt=fifa_df[~d1]['nationality'].unique()
#For other nationalities other than the mentioned above we can consider atleast 25 players 
limit=25
#Finding the unique years (here we have considered 15 to 21 - 6 years of players data)
years=fifa_df['fifa_year'].unique()
#Whoever is not in fifa is tagged in non_fifa (unique values)
not_fifa=fifa_df[~fifa_df.isin(fifa_nt)]['nationality'].unique()
#Creating an empty list 
nt_calls=[]


In [9]:
#Going through each and every year 
for i in years:
    #Going through each and every non fifa nationality 
    for j in not_fifa:
        #filtering fifa_year and nationality and sorting it by overall score in desceding order
        df_not_ft=fifa_df[(fifa_df['fifa_year']==i)&(fifa_df['nationality']==j)][['sofifa_id','overall']].sort_values("overall",ascending=False).reset_index(drop=True)
        #List of all sofifa_id which are top 25
        not_ft_list_25=list(df_not_ft.loc[:min(30,len(df_not_ft)),'sofifa_id'].values)
        #Appending all the sofifa_id into one common list to tag the non_fifa nationailities 
        nt_calls.extend(not_ft_list_25)


In [10]:
#Using the above generated list and assign the national_call variable 
fifa_df['national_call']=np.where(fifa_df['sofifa_id'].isin(nt_calls),1,0)
#Once the national_call for non_fifa nationalities are assigned, we can replace the values of fifa nationalities in the data
fifa_df['national_call']=np.where(fifa_df['nation_jersey_number'].isnull(),1,0)
fifa_df['national_call'].value_counts()

1    115156
0      7685
Name: national_call, dtype: int64

### Creating edges based on nationalities or clubs 

In [11]:
nt_edge={}
club_edge={}
#def convertgraph():
for i in range(len(fifa_df)):
    player=fifa_df.loc[i,'sofifa_id']
    fifa_year=fifa_df.loc[i,'fifa_year']
    year=str(fifa_year)
    club=str(fifa_df.loc[i,'club_name'])
    nt=fifa_df.loc[i,'nationality']
    #creating two boolen values 
    has_nationality=True
    has_club=True
    if fifa_df.loc[i,'national_call']==0:
        has_nationality=False
    if str(club)=='nan':
        has_club=False
    #add year to each link to create a key 
    nt_yr= nt + year
    club_yr=club + year
    #based on nationality, there is an edge formed 
    if has_nationality and nt_yr in nt_edge.keys():
        nt_edge[nt_yr].extend([player])
    elif has_nationality:
        nt_edge[nt_yr]=[player]
    #adding the links thourgh club
    if has_club and club_yr in club_edge.keys():
        club_edge[club_yr].extend([player])
    elif has_club:
        club_edge[club_yr]=[player]
        

In [12]:
#Graph Approach data structues
g = {}
for i in fifa_df['sofifa_id'].unique():
    g[i] = []

In [13]:
#Converting the complete lists into dictionary of lists 
for i in range(len(fifa_df)):
    #accessing each and every row in the dataset 
    player=fifa_df.loc[i,'sofifa_id']
    fifa_year=fifa_df.loc[i,'fifa_year']
    year=str(fifa_year)
    club=str(fifa_df.loc[i,'club_name'])
    nt=fifa_df.loc[i,'nationality']
    #initializing the boolean variables to be True 
    has_nt=True
    has_club=True
    #if the national_call is zero, then player is not there in national team
    if fifa_df.loc[i,'national_call']==0:
        has_nt=False 
    #if there is no club assigned to the player
    if str(club)=="nan":
        has_club=False
    #creating the keys with which the above defined links can be mapped to each row
    nt_yr=nt+year
    club_yr=club+year
    #if player has nationality, then use nt_edge 
    if has_nt:
        g[player].extend(nt_edge[nt_yr])
    #if the player has club, then use club_edge
    if has_club:
        g[player].extend(club_edge[club_yr])

### Creating a network graphs

In [15]:
#Importing the graphs 
import networkx as nx
#checking the type of g - dictionary 
print(type(g))
#creating g1 with graph datatype
g1=nx.Graph()

<class 'dict'>


In [16]:
#creating g1 from the dictionary g 
g1=nx.from_dict_of_lists(g)

### BFS Algorithm

In [21]:
# BFS(G,s) returns two dictionaries: the distances and the parents. 

def BFS(G,s):
    #creating an empty queue 
    queue = [s]
    #creating two dictionary to story if the distance of the node from the vertex 
    visited = {}
    distance = {}
    #assigning nodes to be false 
    for i in G.nodes:
        visited[i] = False
        distance[i] = False
    #Since the distance to same variable is zero
    distance[s] = 0  
    visited[s] = True
    #creating a parent dictionary
    parent = {}
    #Until the queue is empty
    while len(queue) != 0:
        #removing the element from queue 
        node = queue.pop(0)
        #neighbors of the above removed node
        neighbours = list(G.neighbors(node))
        #traversing the neighbors of the above neighbors
        for q_node in neighbours:
            #if it is already visited then do nothing , if not, then enter the if condition
            if not visited[q_node]:
                #Since the node is not visited, the visiting status shall be set True and distance to be noted down as well (+1 from the removed node)
                queue.append(q_node)
                visited[q_node] = True
                parent[q_node] = node
                distance[q_node] = distance[node] + 1
    #return parent node and its distance from the vertex
    return parent,distance

    ###

### Number of Connected Components

In [22]:
def numberOfComponents(G):
    #count initialized to zero
    count=0
    #creating empty list
    p=[]
    #finding all the disconnected components 
    for i in G.nodes():
        #if the value is not in p, then, it is another not connected component 
        if i not in p:
            #count should be increased 
            count=count+1
            p1,d1=BFS(G,i)
            p.extend(p1.keys())
        else:
            continue
    #the final count tells the number of connected components 
    return count
# Driver Code

In [52]:
a=BFS(g1,158023)

In [26]:
numberOfComponents(g1)

436

### Insights on BFS and Connected Components

1. In the above implementation, Lionel Messi's connectedness is being checked. With this, we can use this infer about the teams combination whenever we are making a club
2. There are 436 connected components for this network. However, one network contains almost 98% of the nodes

## Machine Learning 

### Data Preprocessing

In [36]:
fifa_21.columns 
fifa_21.shape

(18944, 107)

In [37]:
#checking missing values of the data
dict1={}
for i in fifa_21.columns.to_list():
    dict1[i]=fifa_21[i].isnull().sum()

In [38]:
print(dict1)
fifa_21.shape

{'sofifa_id': 0, 'player_url': 0, 'short_name': 0, 'long_name': 0, 'age': 0, 'dob': 0, 'height_cm': 0, 'weight_kg': 0, 'nationality': 0, 'club_name': 225, 'league_name': 225, 'league_rank': 225, 'overall': 0, 'potential': 0, 'value_eur': 0, 'wage_eur': 0, 'player_positions': 0, 'preferred_foot': 0, 'international_reputation': 0, 'weak_foot': 0, 'skill_moves': 0, 'work_rate': 0, 'body_type': 0, 'real_face': 0, 'release_clause_eur': 995, 'player_tags': 17536, 'team_position': 225, 'team_jersey_number': 225, 'loaned_from': 18186, 'joined': 983, 'contract_valid_until': 225, 'nation_position': 17817, 'nation_jersey_number': 17817, 'pace': 2083, 'shooting': 2083, 'passing': 2083, 'dribbling': 2083, 'defending': 2083, 'physic': 2083, 'gk_diving': 16861, 'gk_handling': 16861, 'gk_kicking': 16861, 'gk_reflexes': 16861, 'gk_speed': 16861, 'gk_positioning': 16861, 'player_traits': 10629, 'attacking_crossing': 0, 'attacking_finishing': 0, 'attacking_heading_accuracy': 0, 'attacking_short_passing':

(18944, 107)

In [39]:
#dropping variables for which missing values are more than 300
list1=[]
for i in dict1.keys():
    if dict1[i]>300:
        continue
    else:
        list1.append(i)

In [40]:
#Dropped 20 variables which have missing values for the complete row
final_df=fifa_21[list1].copy()

In [41]:
final_df.shape

(18944, 87)

In [42]:
#Dropping rows for which club name and league names are missing 
final_df.dropna(axis=0,inplace=True)

In [43]:
final_df.shape

(18719, 87)

In [44]:
#final_df.head().to_csv("sample.csv")

In [45]:
final_df.drop(['sofifa_id','player_url','short_name','long_name','dob'],axis=1,inplace=True)

In [46]:
final_df.shape

(18719, 82)

In [47]:
#training variables used
training_list=['height_cm','weight_kg','value_eur','wage_eur','international_reputation','weak_foot','skill_moves','attacking_crossing','attacking_finishing','attacking_heading_accuracy','attacking_short_passing','attacking_volleys','skill_dribbling','skill_curve','skill_fk_accuracy','skill_long_passing','skill_ball_control','movement_acceleration','movement_sprint_speed','movement_agility','movement_reactions','movement_balance','power_shot_power','power_jumping','power_stamina','power_strength','power_long_shots','mentality_aggression','mentality_interceptions','mentality_positioning','mentality_vision','mentality_penalties','mentality_composure','defending_standing_tackle','defending_sliding_tackle','goalkeeping_diving','goalkeeping_handling','goalkeeping_kicking','goalkeeping_positioning','goalkeeping_reflexes']
train=final_df[training_list]


In [48]:
y_label=final_df['potential']

In [49]:
t1_means=final_df[['height_cm','weight_kg','value_eur','wage_eur','international_reputation','weak_foot','skill_moves','attacking_crossing','attacking_finishing','attacking_heading_accuracy','attacking_short_passing','attacking_volleys','skill_dribbling','skill_curve','skill_fk_accuracy','skill_long_passing','skill_ball_control','movement_acceleration','movement_sprint_speed','movement_agility','movement_reactions','movement_balance','power_shot_power','power_jumping','power_stamina','potential','power_strength','power_long_shots','mentality_aggression','mentality_interceptions','mentality_positioning','mentality_vision','mentality_penalties','mentality_composure','defending_standing_tackle','defending_sliding_tackle','goalkeeping_diving','goalkeeping_handling','goalkeeping_kicking','goalkeeping_positioning','goalkeeping_reflexes']]

### Splitting train and test datasets 

In [50]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(train,y_label,test_size=0.3,random_state=13)

### K Means Clustering 

In [98]:
from sklearn.cluster import KMeans
num_of_clusters=4

Kmean = KMeans(n_clusters=num_of_clusters)
Kmean.fit(t1_means)
centers= Kmean.cluster_centers_


In [99]:
centers

array([[1.81180775e+02, 7.48858422e+01, 8.73160532e+05, 4.32504294e+03,
        1.03410624e+00, 2.89719053e+00, 2.27548767e+00, 4.77010183e+01,
        4.40095694e+01, 5.05832413e+01, 5.68245614e+01, 4.08549871e+01,
        5.35663722e+01, 4.50982088e+01, 4.06470985e+01, 5.07539566e+01,
        5.65439823e+01, 6.33094099e+01, 6.33567047e+01, 6.22602135e+01,
        5.96262422e+01, 6.32719912e+01, 5.58745553e+01, 6.39414182e+01,
        6.11917556e+01, 6.97797816e+01, 6.40346583e+01, 4.46986873e+01,
        5.39303766e+01, 4.47415041e+01, 4.83435161e+01, 5.18959023e+01,
        4.67078886e+01, 5.58622255e+01, 4.62540793e+01, 4.43894614e+01,
        1.65135566e+01, 1.63084898e+01, 1.61814501e+01, 1.62859772e+01,
        1.66346461e+01],
       [1.81387879e+02, 7.58000000e+01, 2.31287879e+07, 7.49227273e+04,
        2.06060606e+00, 3.31515152e+00, 3.16363636e+00, 6.55212121e+01,
        6.15848485e+01, 5.97000000e+01, 7.49666667e+01, 5.92303030e+01,
        7.21818182e+01, 6.58939394e+01,

In [100]:
t1_means.head()

Unnamed: 0,height_cm,weight_kg,value_eur,wage_eur,international_reputation,weak_foot,skill_moves,attacking_crossing,attacking_finishing,attacking_heading_accuracy,...,mentality_vision,mentality_penalties,mentality_composure,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes
0,170,72,67500000,560000,5,4,4,85,95,70,...,95,75,96,35,24,6,11,15,14,8
1,187,83,46000000,220000,5,4,5,84,95,90,...,82,84,95,32,24,7,11,15,14,11
2,188,87,75000000,125000,3,3,1,13,11,15,...,65,11,68,12,18,87,92,78,90,90
3,184,80,80000000,240000,4,4,4,71,94,85,...,79,88,88,42,19,15,6,12,8,10
4,175,68,90000000,270000,5,5,5,85,87,62,...,90,92,93,30,29,9,9,15,15,11


In [101]:
#predicting the clusters
l1=Kmean.predict(t1_means)
l1

array([3, 3, 3, ..., 0, 0, 0])

### Insights 

1. As you can see above, the players are segregated into few clusters 
2. This could be very handy model when a new player is to be taken and see if he is the right replacement for the team based on the cluster it is in 

### Linear Regression 

In [156]:
from sklearn.linear_model import LinearRegression
log=LinearRegression()
log.fit(X_train,Y_train)
Yhat_log=log.predict(X_test)
from sklearn.metrics import r2_score,mean_squared_error
print("The R2 score of Linear Regression model is",r2_score(Y_test,Yhat_log))
print("The RMSE of Linear Regression model is",mean_squared_error(Y_test,Yhat_log))

The R2 score of Linear Regression model is 0.47783442886954064
The RMSE of Linear Regression model is 19.29700553620746


In [159]:
b1=log.coef_ #Coefficients of the variables 
a1=pd.DataFrame(b1,index=training_list)
print(a1)

                                       0
height_cm                   1.440579e-01
weight_kg                  -7.300577e-02
value_eur                   4.780300e-07
wage_eur                   -1.966584e-05
international_reputation   -4.884047e-01
weak_foot                   1.305729e-01
skill_moves                 4.847511e-01
attacking_crossing         -5.169344e-02
attacking_finishing         2.692180e-03
attacking_heading_accuracy  4.725256e-02
attacking_short_passing     7.440388e-02
attacking_volleys          -3.829799e-02
skill_dribbling             1.168597e-01
skill_curve                 1.310908e-02
skill_fk_accuracy          -2.567896e-02
skill_long_passing         -3.780624e-02
skill_ball_control          1.181176e-01
movement_acceleration       5.498172e-02
movement_sprint_speed       3.402441e-02
movement_agility           -2.151172e-02
movement_reactions          1.109728e-01
movement_balance            3.097120e-03
power_shot_power            5.847492e-02
power_jumping   

### Insights 

1. For predicting the potential of a player, reputation has been proven to best representative of the potential of player 
2. It is evident that the trend between the predictors and independent variables are not very linear 

### Decision Tree Regressor 

In [160]:
from sklearn.tree import DecisionTreeRegressor
dt=DecisionTreeRegressor()
dt.fit(X_train,Y_train)
Yhat_dt=log.predict(X_test)
from sklearn.metrics import r2_score,mean_squared_error
print("The R2 score of Decision Tree Regression model is",r2_score(Y_test,Yhat_dt))
print("The RMSE of Decision Tree Regression model is",mean_squared_error(Y_test,Yhat_dt))

The R2 score of Decision Tree Regression model is 0.47783442886954064
The RMSE of Decision Tree Regression model is 19.29700553620746


### Insights 

1. The Decision Tree also didn't fit in the data. This could be done due to overfitting 

### Random Forest Regression 

In [146]:
from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor()
rf.fit(X_train,Y_train)
Yhat_rf=rf.predict(X_test)
from sklearn.metrics import r2_score,mean_squared_error
print("The R2 score of Random Forest Regressor model is",r2_score(Y_test,Yhat_rf))
print("The RMSE of the model is",mean_squared_error(Y_test,Yhat_rf))

The R2 score of Random Forest Regressor model is 0.7845049459307858
The RMSE of the model is 7.963775249287749


### Insights 

1. The Random forest classifier is the best model to predict the potential as this might have reduced the overfitting problem as well and giving the best accuracy 

### Support Vector Machine 

In [147]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train,Y_train)
Yhat_svm=regressor.predict(X_test)
from sklearn.metrics import r2_score,mean_squared_error
print("The R2 score of Support Vector Machine model is",r2_score(Y_test,Yhat_svm))
print("The RMSE of Support Vector Machine model is",mean_squared_error(Y_test,Yhat_svm))

The R2 score of Support Vector Machine model is 0.5495636896366514
The RMSE of Support Vector Machine model is 16.646198936426526


### Insights 

1. The Support Vector regression is doing better than the linear regression and decision tree but not better than ensemble technique (random forest)

### Linear Programming 

In [107]:
#Only implementing the linear programming on Spain league 
spain_club=fifa_df[(fifa_df['league_name']=="Spain Primera Division")&(fifa_df['fifa_year']==2021)]
spain_club.shape

(645, 109)

In [108]:
spain_club['club_name'].value_counts(dropna=True)
#Unique number of sofifa id in the dataset for spain league
spain_club['sofifa_id'].value_counts().count()

645

In [109]:
#Finding the sum of all the money invested by each club in spain league 
spain_club.groupby(['club_name'])['value_eur'].sum()

club_name
Athletic Club de Bilbao    253550000
Atl�tico Madrid            582500000
CA Osasuna                 139875000
C�diz CF                    70500000
Deportivo Alav�s           156425000
Elche CF                    42830000
FC Barcelona               722200000
Getafe CF                  212645000
Granada CF                 166400000
Levante UD                 179675000
RC Celta                   169700000
Real Betis                 243290000
Real Madrid                760850000
Real Sociedad              290470000
Real Valladolid CF         139225000
SD Eibar                   141190000
SD Huesca                   75590000
Sevilla FC                 357875000
Valencia CF                254950000
Villarreal CF              307125000
Name: value_eur, dtype: int64

In [110]:
#Finding the mean of the potential across the club in the spain league
spain_club.groupby(['club_name'])['potential'].mean()

club_name
Athletic Club de Bilbao    78.677419
Atl�tico Madrid            82.575758
CA Osasuna                 75.774194
C�diz CF                   72.090909
Deportivo Alav�s           76.242424
Elche CF                   71.833333
FC Barcelona               84.939394
Getafe CF                  76.909091
Granada CF                 76.181818
Levante UD                 76.090909
RC Celta                   77.151515
Real Betis                 78.030303
Real Madrid                84.781250
Real Sociedad              79.303030
Real Valladolid CF         76.212121
SD Eibar                   74.733333
SD Huesca                  75.133333
Sevilla FC                 80.060606
Valencia CF                79.636364
Villarreal CF              80.031250
Name: potential, dtype: float64

In [111]:
#Labelling the club name using label encoder
from sklearn import preprocessing
spain_club['team_code']=preprocessing.LabelEncoder().fit_transform(spain_club['club_name'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  spain_club['team_code']=preprocessing.LabelEncoder().fit_transform(spain_club['club_name'])


In [112]:
#Printing out the team codes 
spain_club['team_code'].value_counts()

10    33
13    33
18    33
3     33
4     33
17    33
6     33
7     33
8     33
9     33
1     33
11    33
14    33
19    32
12    32
2     31
0     31
15    30
16    30
5     30
Name: team_code, dtype: int64

In [113]:
#Resetting index to ensure that the rows are properly tracked (0 to 645)
spain_club=spain_club.reset_index(drop=True)

In [114]:
#Optimization
from pulp import *
prob = pulp.LpProblem('FantasyTeam', LpMaximize)
#Finding the list of variables for whicht we are going to apply optimization on 
final_vars=[]
#Finding each and every variable across a row
for i in range(len(spain_club)):
    #defining each row as one x and its number 
    var=str('x'+str(i))
    #defining each variable  
    var=pulp.LpVariable(str(var),lowBound =0, upBound=1, cat="Integer")
    #appending variables to form a big list 
    final_vars.append(var)
#Checking if the number of variables are equal to 645 or not
print(len(final_vars))

645


In [115]:
#checking if the potential is missing for any player
spain_club['potential'].isnull().sum()

0

In [116]:
#creating the objective function which we want to maximize
#creating an empty string to store the objective function
obj = ""
#loopinng i through all the rows of dataset to find the product of potential and the the player tag as we did it in final_vars
for num, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        #if both of the above datasets are representing the same dataset, then include that in objective function
        if num == i:
            #formula for calculating an equation for objecive function
            formula = row['potential']*player
            obj += formula
#giving the objective function to the solver 
prob += obj
print ("Optimization function for finding the best team : " + str(obj))

Optimization function for finding the best team : 93*x0 + 93*x1 + 87*x10 + 80*x100 + 80*x101 + 80*x102 + 80*x103 + 80*x104 + 80*x105 + 80*x106 + 80*x107 + 80*x108 + 80*x109 + 87*x11 + 80*x110 + 80*x111 + 80*x112 + 80*x113 + 90*x114 + 86*x115 + 86*x116 + 86*x117 + 85*x118 + 84*x119 + 87*x12 + 84*x120 + 84*x121 + 84*x122 + 83*x123 + 82*x124 + 82*x125 + 82*x126 + 82*x127 + 80*x128 + 80*x129 + 88*x13 + 80*x130 + 80*x131 + 80*x132 + 79*x133 + 79*x134 + 79*x135 + 79*x136 + 79*x137 + 79*x138 + 79*x139 + 86*x14 + 79*x140 + 79*x141 + 79*x142 + 79*x143 + 79*x144 + 79*x145 + 79*x146 + 79*x147 + 79*x148 + 91*x149 + 86*x15 + 88*x150 + 86*x151 + 84*x152 + 83*x153 + 82*x154 + 81*x155 + 79*x156 + 78*x157 + 78*x158 + 78*x159 + 86*x16 + 78*x160 + 78*x161 + 78*x162 + 78*x163 + 78*x164 + 78*x165 + 78*x166 + 78*x167 + 78*x168 + 78*x169 + 86*x17 + 78*x170 + 78*x171 + 78*x172 + 78*x173 + 86*x174 + 85*x175 + 83*x176 + 82*x177 + 82*x178 + 82*x179 + 90*x18 + 82*x180 + 81*x181 + 80*x182 + 79*x183 + 79*x184 + 78*

In [117]:
#the available purse to buy the players for a club 
avail_cash = 84000000
#creating a constraint to restrict that purse should not go above this 
total_paid= ""
#Iterating over rows to create the equation which involves purse of each player 
for num, row in spain_club.iterrows():
    for i , player in enumerate(final_vars):
        if num==i:
            #equation includes worth of player and player tag in final vars 
            formula = row['value_eur']*player
            total_paid += formula
#adding the constraint which includes players worth in euros 
prob += (total_paid <= avail_cash)
#Printing total paid
print(total_paid)

67500000*x0 + 75000000*x1 + 24500000*x10 + 13500000*x100 + 14000000*x101 + 15000000*x102 + 12000000*x103 + 11500000*x104 + 12000000*x105 + 15500000*x106 + 12500000*x107 + 12000000*x108 + 9000000*x109 + 38000000*x11 + 12500000*x110 + 13000000*x111 + 14500000*x112 + 12500000*x113 + 21000000*x114 + 18000000*x115 + 15500000*x116 + 16000000*x117 + 15500000*x118 + 17000000*x119 + 50500000*x12 + 15500000*x120 + 17000000*x121 + 15000000*x122 + 12000000*x123 + 15500000*x124 + 16000000*x125 + 14000000*x126 + 16000000*x127 + 12500000*x128 + 12500000*x129 + 46500000*x13 + 14000000*x130 + 14000000*x131 + 14500000*x132 + 7500000*x133 + 8000000*x134 + 8000000*x135 + 10000000*x136 + 8000000*x137 + 10000000*x138 + 6500000*x139 + 21000000*x14 + 8000000*x140 + 11500000*x141 + 12000000*x142 + 12000000*x143 + 13500000*x144 + 11500000*x145 + 10000000*x146 + 11000000*x147 + 11500000*x148 + 20000000*x149 + 22500000*x15 + 15000000*x150 + 14000000*x151 + 13000000*x152 + 14000000*x153 + 12000000*x154 + 13000000*

### Team Constraints 

In [118]:
# every team has to have a goal keeper 
gk = 1
#finding a constraint that gives one goal keeper out of the 12 players 
total_gk = ""
#finding keepers which are very good with keeping only
for num, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        if num==i:
            #Anyone with more than goalkeeping handling of 50 and rest very low is considered to be a good fit for the team
            if row['goalkeeping_handling'] > 50 and row['defending_sliding_tackle'] < 50 and row['skill_dribbling']<50 and row['attacking_volleys']<50:
                #Finding an equation which involves only those players whose keeping is more than 50
                formula = 1*player
                #appending the forumla at the end 
                total_gk += formula
#Adding the constraint to the objective function
prob += (total_gk == gk)
print(total_gk)    

x1 + x123 + x157 + x158 + x164 + x189 + x191 + x2 + x215 + x229 + x231 + x237 + x240 + x251 + x283 + x284 + x3 + x312 + x321 + x322 + x351 + x354 + x370 + x413 + x420 + x424 + x426 + x454 + x46 + x471 + x478 + x490 + x492 + x494 + x506 + x512 + x513 + x524 + x53 + x530 + x531 + x532 + x534 + x549 + x55 + x552 + x556 + x558 + x56 + x563 + x565 + x573 + x575 + x585 + x586 + x604 + x605 + x607 + x616 + x618 + x619 + x620 + x626 + x63 + x644 + x65 + x66 + x76 + x78 + x79 + x86 + x90


In [119]:
#Constraining the number of defenders in the team to atleast three
defen = 3
#creating a constraint which limits the defenders to be atleast 3
total_def = ""
for rownum, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        if rownum == i:
            #ANy player with more than 50 defending sliding tackle is considered to be a good fit for the team
            if row['goalkeeping_handling'] < 50 and row['defending_sliding_tackle'] > 50 and row['skill_dribbling']<50 and row['attacking_volleys']<50:
                #Finding an equation for whom the above equation is satisfied
                formula = 1*player
                #Appending all the formulaes
                total_def += formula
prob += (total_def == defen)
print(total_def)

x152 + x154 + x165 + x195 + x223 + x255 + x260 + x276 + x308 + x309 + x314 + x326 + x331 + x360 + x391 + x399 + x400 + x417 + x431 + x432 + x448 + x462 + x485 + x489 + x501 + x503 + x509 + x515 + x545 + x546 + x561 + x572 + x589 + x594 + x596 + x597 + x608 + x610 + x613 + x623 + x639 + x642 + x643 + x75


In [120]:
#constraining the mid fileders to atleast 3 in the final team
mid = 3
#creating an equation which limits to mid fileders to atleast 3 
total_mid = ""
#looping in over to form a equation for which the constraints are satisfied 
for num, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        if num == i:
            #Any player for which skill dribbling is considered to be a good fit for the team 
            if row['goalkeeping_handling'] < 50 and row['defending_sliding_tackle'] < 50 and row['skill_dribbling']>50 and row['attacking_volleys']<50:
                #Equation involving only the players for which the above constraint is satisfied 
                formula = 1*player
                #Appending all the equations 
                total_mid += formula
#Adding the constraint to the equation
prob += (total_mid == mid)
print((total_mid))

x316 + x319 + x361 + x388 + x416 + x421 + x447 + x456 + x473 + x487 + x499 + x510 + x518 + x519 + x521 + x522 + x526 + x527 + x540 + x541 + x543 + x559 + x560 + x566 + x570 + x571 + x576 + x577 + x591 + x592 + x598 + x603 + x606 + x611 + x629 + x630 + x636 + x637 + x638


In [121]:
#Constraining the forward fielders to atleast three in the final team
fwd = 3
#creating an equation that limits mid fielders to atleast 3
total_fwd = ""
#looping over to find the available attacking players 
for num, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        if num == i:
            #Any player with more than attacking volleys is considered to be good fit for the team to play in attacking position 
            if row['goalkeeping_handling'] < 50 and row['defending_sliding_tackle'] < 50 and row['skill_dribbling']>20 and row['attacking_volleys']>50:
                #Involves only for which the above constraint is satisfied  
                formula = 1*player
                #Appending all the individual equations 
                total_fwd += formula
#Finding two attacking players with the above constraints
prob += (total_fwd == fwd)
print(total_fwd)

x0 + x100 + x102 + x106 + x114 + x115 + x119 + x12 + x125 + x132 + x133 + x134 + x135 + x139 + x141 + x143 + x144 + x146 + x148 + x149 + x15 + x153 + x155 + x156 + x159 + x167 + x168 + x170 + x172 + x174 + x187 + x190 + x192 + x197 + x200 + x207 + x208 + x209 + x210 + x211 + x212 + x213 + x217 + x218 + x221 + x23 + x233 + x234 + x235 + x236 + x239 + x242 + x245 + x246 + x249 + x252 + x257 + x259 + x261 + x265 + x271 + x28 + x281 + x290 + x294 + x300 + x302 + x306 + x307 + x310 + x313 + x318 + x324 + x329 + x33 + x330 + x335 + x336 + x337 + x340 + x347 + x352 + x353 + x358 + x362 + x363 + x364 + x367 + x369 + x384 + x385 + x39 + x392 + x393 + x395 + x401 + x414 + x419 + x422 + x423 + x428 + x429 + x434 + x438 + x439 + x44 + x440 + x443 + x444 + x446 + x45 + x450 + x451 + x453 + x457 + x458 + x459 + x464 + x466 + x467 + x469 + x470 + x472 + x475 + x477 + x481 + x482 + x486 + x49 + x491 + x493 + x495 + x496 + x498 + x5 + x500 + x502 + x507 + x508 + x51 + x516 + x517 + x52 + x520 + x537 + 

In [122]:
#the dream team should consist of 12 players which includes 1 goal keeper, 3 attacking players, 3 defending players, 3 midefileders and one any player
#Total number of players in dream team
total_players = 12
#creating an equation to limit the maximum players to 12
total_play = ""
#looping in over to find the equation to constrain the number of players to 12
for num, row in spain_club.iterrows():
    for i, player in enumerate(final_vars):
        if num == i:
            formula = 1*player
            #Appending all the individual equations 
            total_play += formula
#Finding two attacking players with the above constraints
prob += (total_play == total_players)
#checking if all the variables are there or not
print(len(total_play))

645


In [123]:
print(prob)

FantasyTeam:
MAXIMIZE
93*x0 + 93*x1 + 87*x10 + 80*x100 + 80*x101 + 80*x102 + 80*x103 + 80*x104 + 80*x105 + 80*x106 + 80*x107 + 80*x108 + 80*x109 + 87*x11 + 80*x110 + 80*x111 + 80*x112 + 80*x113 + 90*x114 + 86*x115 + 86*x116 + 86*x117 + 85*x118 + 84*x119 + 87*x12 + 84*x120 + 84*x121 + 84*x122 + 83*x123 + 82*x124 + 82*x125 + 82*x126 + 82*x127 + 80*x128 + 80*x129 + 88*x13 + 80*x130 + 80*x131 + 80*x132 + 79*x133 + 79*x134 + 79*x135 + 79*x136 + 79*x137 + 79*x138 + 79*x139 + 86*x14 + 79*x140 + 79*x141 + 79*x142 + 79*x143 + 79*x144 + 79*x145 + 79*x146 + 79*x147 + 79*x148 + 91*x149 + 86*x15 + 88*x150 + 86*x151 + 84*x152 + 83*x153 + 82*x154 + 81*x155 + 79*x156 + 78*x157 + 78*x158 + 78*x159 + 86*x16 + 78*x160 + 78*x161 + 78*x162 + 78*x163 + 78*x164 + 78*x165 + 78*x166 + 78*x167 + 78*x168 + 78*x169 + 86*x17 + 78*x170 + 78*x171 + 78*x172 + 78*x173 + 86*x174 + 85*x175 + 83*x176 + 82*x177 + 82*x178 + 82*x179 + 90*x18 + 82*x180 + 81*x181 + 80*x182 + 79*x183 + 79*x184 + 78*x185 + 78*x186 + 78*x187 + 7

In [124]:
prob.writeLP('FantasyTeam.lp')
optimization_result = prob.solve()
assert optimization_result == LpStatusOptimal
print("Status:", LpStatus[prob.status])
print("Optimal Solution to the problem: ", value(prob.objective))
print ("Individual decision_variables: ")
for v in prob.variables():
    print(v.name, "=", v.varValue)

Status: Optimal
Optimal Solution to the problem:  1021.0
Individual decision_variables: 
x0 = 0.0
x1 = 0.0
x10 = 0.0
x100 = 0.0
x101 = 0.0
x102 = 0.0
x103 = 0.0
x104 = 0.0
x105 = 0.0
x106 = 0.0
x107 = 0.0
x108 = 0.0
x109 = 0.0
x11 = 0.0
x110 = 0.0
x111 = 0.0
x112 = 0.0
x113 = 0.0
x114 = 0.0
x115 = 0.0
x116 = 0.0
x117 = 0.0
x118 = 0.0
x119 = 0.0
x12 = 0.0
x120 = 0.0
x121 = 0.0
x122 = 0.0
x123 = 0.0
x124 = 0.0
x125 = 0.0
x126 = 0.0
x127 = 0.0
x128 = 0.0
x129 = 0.0
x13 = 0.0
x130 = 0.0
x131 = 0.0
x132 = 0.0
x133 = 0.0
x134 = 0.0
x135 = 0.0
x136 = 0.0
x137 = 0.0
x138 = 0.0
x139 = 0.0
x14 = 0.0
x140 = 0.0
x141 = 0.0
x142 = 0.0
x143 = 0.0
x144 = 0.0
x145 = 0.0
x146 = 0.0
x147 = 0.0
x148 = 0.0
x149 = 1.0
x15 = 0.0
x150 = 0.0
x151 = 0.0
x152 = 0.0
x153 = 0.0
x154 = 0.0
x155 = 0.0
x156 = 0.0
x157 = 0.0
x158 = 0.0
x159 = 0.0
x16 = 0.0
x160 = 0.0
x161 = 0.0
x162 = 0.0
x163 = 0.0
x164 = 0.0
x165 = 0.0
x166 = 0.0
x167 = 0.0
x168 = 0.0
x169 = 0.0
x17 = 0.0
x170 = 0.0
x171 = 0.0
x172 = 0.0
x173 = 0.0

In [125]:
final_obj=spain_club.filter(items=[149,212,250,251,518,519,485,448,447,444,365,255],axis=0)

In [126]:
dream_team = final_obj[['sofifa_id','short_name','potential','goalkeeping_handling','value_eur','skill_dribbling','defending_sliding_tackle','attacking_volleys']]
dream_team

Unnamed: 0,sofifa_id,short_name,potential,goalkeeping_handling,value_eur,skill_dribbling,defending_sliding_tackle,attacking_volleys
149,244778,Trinc�o,91,9,20000000,85,32,66
212,253004,Ansu Fati,90,9,15000000,79,28,59
250,242816,Riqui Puig,88,15,12500000,81,53,56
251,243952,A. Lunin,87,71,9500000,11,11,12
518,252326,J. Sanabria,82,6,1200000,66,47,48
519,254860,Valera,82,12,1100000,68,14,48
485,253163,R. Araujo,81,14,1600000,43,63,32
448,241708,M. Sagnan,80,6,1700000,38,68,27
447,245999,Mollejo,82,12,2000000,67,47,47
444,257315,Gustavo Maia,85,9,2300000,73,21,65


### Insights

1.The Dream team consits of only one goal keeper (71 in skills), three defending players (77,68,68) , three attacking players( 66,59,56) and three mid fielders ( 85,79,81) <br>
2. Using linear optimization, we can also find different combinations just by setting different constraints

# Thank you 