In [1]:
!pip -q install pulp

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing
from pulp import *
import os
data_paths = {}
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        data_paths[filename] = os.path.join(dirname, filename)
        print(filename)

# Any results you write to the current directory are saved as output.

use_case_question.docx
usecase_players.csv
sample_solution.csv


In [3]:
pdf = pd.read_csv(data_paths['usecase_players.csv'])
res_df = pd.read_csv(data_paths['sample_solution.csv'])
pdf.shape, res_df.shape

((30, 5), (11, 5))

In [4]:
def transform_data(players_df):
    
    # copy
    df = players_df.copy()
    
    # replace original team abbv with 1 & 2
    teamNameMap = {i:j+1 for j, i in enumerate(df['teamName'].unique())}
    df['teamName'] = df['teamName'].replace(teamNameMap)
    
    # think it as availability, if player is injured/not available, set it to zero
    df['quantity'] = 1
    
    # dummy ROI values based on the selection popularity
    df['player_roi'] = df['selectionPercent'].apply(lambda x: int(x[:-1]))
    df['player_roi'] = df['player_roi']/df['credits']
    
    # player_roi (maximize), credits (<=100), quantity (Total = 11)
    # onehot encode => player_role, teamName
    df = pd.get_dummies(df, columns=['teamName', 'player_role'])
    
    # defined feature columns
    feature_cols = ['credits', 'player_roi', 'quantity', 'teamName_1', 'teamName_2', 
               'player_role_ar', 'player_role_bat', 'player_role_bowl', 'player_role_wk']
    
    # check if all features are present
    for col in feature_cols:
        if col not in df.columns:
            raise("Required columns missing to form features:", col)
            
    # Creates a list of the Players
    player_names = list(df['playerName'])
    feat_dict = {}
    for col in feature_cols:
        feat_dict[col] = dict(zip(player_names, df[col].values))
        
    return player_names, feat_dict

In [5]:
def optimize_team(player_names, features):
    """
    Function to call model the Linear Programming
    
    # min_per_team = 4
    # max_per_team = 7
    # bat_range = range(3, 5+1)
    # ar_range = range(1, 3+1)
    # bowl_range = range(3, 5+1)
    # max_credits = 100
    
    Args:
    ------
    player_names: list->list of all unique player names
    features: dict->dict with following features (as keys)
    ('credits', 'player_roi', 'quantity', 'teamName_1', 'teamName_2', 
               'player_role_ar', 'player_role_bat', 'player_role_bowl', 'player_role_wk')
               
    returns:
    ------
    optimized LP problem (prob)
    sloved variables & values can be accessed as
    for v in prob.variables():
        print(v.varValue)    
    """
    
    # Players chosen 
    player_chosen = LpVariable.dicts("playerChosen", player_names, 0, 1, cat='Integer')
    
    # define np problem
    prob = LpProblem("Fantasy_Cricket", LpMaximize)

    # The objective function is added to 'prob' first
    prob += lpSum([feat['player_roi'][i]*player_chosen[i] for i in player_names]), "MaximizeROI"
    
    # max credits: credits are multiplied by 2 to convert them into integers
    prob += lpSum([feat['credits'][i]*player_chosen[i] for i in player_names]) <= 100, "MaxCredits"
    
    # Total
    prob += lpSum([feat['quantity'][f] * player_chosen[f] for f in player_names]) == 11, "Totalselection"

    # Wk
    prob += lpSum([feat['player_role_wk'][f] * player_chosen[f] for f in player_names]) == 1, "Wkequal"

    # Batsmen
    prob += lpSum([feat['player_role_bat'][f] * player_chosen[f] for f in player_names]) >= 3, "BatsmenMinimum"
    prob += lpSum([feat['player_role_bat'][f] * player_chosen[f] for f in player_names]) <= 5, "BatsmenMaximum"

    # Bowler
    prob += lpSum([feat['player_role_bowl'][f] * player_chosen[f] for f in player_names]) >= 3, "BowlerMinimum"
    prob += lpSum([feat['player_role_bowl'][f] * player_chosen[f] for f in player_names]) <= 5, "BowlerMaximum"

    # All rounder
    prob += lpSum([feat['player_role_ar'][f] * player_chosen[f] for f in player_names]) >= 1, "ArMinimum"
    prob += lpSum([feat['player_role_ar'][f] * player_chosen[f] for f in player_names]) <= 3, "ArMaximum"

    # India
    prob += lpSum([feat['teamName_1'][f] * player_chosen[f] for f in player_names]) >= 4, "Team1Minimum"
    prob += lpSum([feat['teamName_1'][f] * player_chosen[f] for f in player_names]) <= 7, "Team1Maximum"

    # Wi
    prob += lpSum([feat['teamName_2'][f] * player_chosen[f] for f in player_names]) >= 4, "Team2Minimum"
    prob += lpSum([feat['teamName_2'][f] * player_chosen[f] for f in player_names]) <= 7, "Team2Maximum"

    # The problem data is written to an .lp file
    r = prob.writeLP("FantasyCricket.lp")
    
    prob.solve()
    print("Status:", LpStatus[prob.status])
    # prob.solver
    
    print("ROI maximized = {}".format(round(value(prob.objective),2)))
    
    return prob

In [6]:
player_names, feat = transform_data(pdf)
prob = optimize_team(player_names, feat)

Status: Optimal
ROI maximized = 108.68


In [7]:
player_roles = dict(zip(pdf['playerName'], pdf['player_role']))

players_choosen = []
players_credits = []
for v in prob.variables():
    if v.varValue>0:
        act_name = " ".join(v.name.split('_')[-2:])
        credit = feat['credits'][act_name]
        players_choosen.append(act_name)
        players_credits.append(credit)
        print(v.name, ' = ', credit, player_roles[act_name])
        
print('Total credits:', sum(players_credits))

playerChosen_Brandon_King  =  8.0 bat
playerChosen_Deepak_Chahar  =  8.5 bowl
playerChosen_Fabian_Allen  =  8.0 ar
playerChosen_Jason_Holder  =  8.5 ar
playerChosen_Khary_Pierre  =  8.0 bowl
playerChosen_Kuldeep_Yadav  =  8.5 bowl
playerChosen_Lendl_Simmons  =  8.5 bat
playerChosen_Manish_Pandey  =  8.5 bat
playerChosen_Nicholas_Pooran  =  9.0 wk
playerChosen_Sherfane_Rutherford  =  8.0 ar
playerChosen_Virat_Kohli  =  10.5 bat
Total credits: 94.0


In [8]:
res_df = pdf[pdf['playerName'].isin(players_choosen)].reset_index(drop = 1)
res_df.to_csv('solution.csv', index = None)

In [9]:
###

In [10]:
# maximize value
# minimize credits

# values = df['value'].values.tolist()
# credits = df['credits'].values.tolist()
# max_credits = 100

# def fantasy_dp(values, credits, n_items, max_credit, return_all=False):
    
#     max_credit = 2*max_credit
#     credits = [int(2*i) for i in credits]

#     table = np.zeros((n_items+1, max_credit+1),dtype=np.float32)
#     keep = np.zeros((n_items+1, max_credit+1),dtype=np.float32)
    
#     for i in range(1, n_items+1):
#         for j, c in enumerate(range(max_credit+1)):
#             ci = credits[i-1] # credit of current item
#             vi = values[i-1] # value of current item
#             if (ci <= c) and (vi + table[i-1,c-ci] > table[i-1,c]):
#                 table[i,c] = ci + table[i-1,c-ci]
#                 keep[i,c] = 1
#             else:
#                 table[i,c] = table[i-1,c]

#     picks = []
#     M = max_credit

#     for i in range(n_items,0,-1):
#         if keep[i,M] == 1:
#             picks.append(i)
#             M -= credits[i-1]

#     picks.sort()
#     picks = [x-1 for x in picks] # change to 0-index
    
#     return picks

# n_items = 30
# capacity = 100
# picks = fantasy_dp(values, credits, n_items, capacity)
# print(picks)

# picks_credits = []
# for p in picks:
#     picks_credits.append(credits[p])
# print(sum(picks_credits)), picks_credits