In [1]:
from google.colab import drive
drive.mount('/content/gdrive')  

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
!pip install pulp



In [3]:
import os
from typing import List
import logging
import argparse

import pandas as pd
import numpy as np
from ast import literal_eval
import pulp as p

In [4]:
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG)
logger = logging.getLogger()

In [5]:
MANDATORY_COLUMNS = {
    "player_name",
    "player_category",
    "team_name",
    "last_5_matches_points",
    "cost",
}

In [6]:
# Dream11 constraints
MAX_PLAYERS = 11
MAX_BATSMEN = 5
MAX_ALLROUNDERS = 3
MAX_BOWLERS = 5
MAX_KEEPERS = 2
MAX_COST = 100
MAX_PLAYER_IN_EACH_TEAM = 7
MANDATORY_INPUT_COLUMNS = {
    "team_name",
    "player_name",
    "cost",
    "player_category",
    "last_5_matches_points",
}

In [7]:
HUMANFRIENDLY_COLUMNS = [
    "player_name",
    "weighted_player_points",
    "is_selected",
    "cost",
    "last_5_matches_points",
]

In [8]:
def read_data(abs_filename: str) -> pd.DataFrame:
    data = pd.read_excel(abs_filename) 
    data['last_5_matches_points'] = data.last_5_matches_points.apply(literal_eval)
    print(data)
    assert set(data.columns) == MANDATORY_COLUMNS

    return data

In [9]:
def compute_weighted_points(points_list: List, alpha: int = 0.20):
    weights = np.exp(
        list(reversed(np.array(range(1, len(points_list) + 1)) * alpha * -1))
    )
    exponential_weighted_average = np.average(np.array(points_list), weights=weights)
    return exponential_weighted_average

In [10]:
def _get_decision_variables(all_data: pd.DataFrame) -> pd.DataFrame:
    # define decision variables for each row in the input dataframe

    decision_variables = []

    for rownum, row in all_data.iterrows():
        variable = str("x_{}".format(str(rownum)))
        variable = p.LpVariable(variable, lowBound=0, upBound=1, cat="Integer")
        decision_variables.append(
            {"player_name": row["player_name"], "pulp_variable": variable}
        )

    return pd.DataFrame(decision_variables)

In [11]:
def _get_optimization_function(player_df: pd.DataFrame) -> p.LpProblem:
    # Create optimization Function
    prob = p.LpProblem("Dreamteam", p.LpMaximize)

    total_points = ""
    for index, row in player_df.iterrows():
        formula = row["weighted_player_points"] * row["pulp_variable"]
        total_points += formula
    prob += total_points
    return prob

In [12]:
def _add_constraints(player_df: pd.DataFrame, optimization_prob: p.LpProblem):
    total_keepers = ""
    total_batsman = ""
    total_allrounder = ""
    total_bowler = ""
    total_players = ""
    total_cost = ""
    total_team2 = ""
    total_team1 = ""

    for rownum, row in player_df.iterrows():
        keeper_formula = row["player_category_wicket_keeper"] * row["pulp_variable"]
        total_keepers += keeper_formula

        batsman_formula = row["player_category_batsman"] * row["pulp_variable"]
        total_batsman += batsman_formula

        allrounder_formula = row["player_category_all_rounder"] * row["pulp_variable"]
        total_allrounder += allrounder_formula

        bowler_formula = row["player_category_bowler"] * row["pulp_variable"]
        total_bowler += bowler_formula

        total_players_formula = row["pulp_variable"]
        total_players += total_players_formula

        total_cost_formula = row["cost"] * row["pulp_variable"]
        total_cost += total_cost_formula

        formula = row["team_name_RR"] * row["pulp_variable"]
        total_team1 += formula

        formula = row["team_name_PKS"] * row["pulp_variable"]
        total_team2 += formula

    
    print(total_team2)
    optimization_prob += total_keepers <= MAX_KEEPERS
    optimization_prob += total_batsman <= MAX_BATSMEN
    optimization_prob += total_allrounder <= MAX_ALLROUNDERS
    optimization_prob += total_bowler <= MAX_BOWLERS
    optimization_prob += total_players == MAX_PLAYERS
    optimization_prob += total_cost <= MAX_COST
    optimization_prob += total_team1 <= MAX_PLAYER_IN_EACH_TEAM
    optimization_prob += total_team2 <= MAX_PLAYER_IN_EACH_TEAM

    print(optimization_prob)
    optimization_prob.writeLP("Dreamteam.lp")
    return optimization_prob

In [13]:
def caller(filepath):
    #filepath: str = os.path.join(".", "data", "dream11_performance_data.csv")
#) -> pd.DataFrame:
    '''
    if not os.path.exists(filepath):
        logger.error(
            f"Filepath {filepath} does not exist. Make sure that the data is available at the desired location"
        )
        raise FileNotFoundError(f"filepath {filepath} does not exist")'''
    print(filepath)

    raw_data = read_data(filepath)
    print(raw_data)
    assert (
        set(raw_data.columns) == MANDATORY_INPUT_COLUMNS
    ), f"Invalid columns. CSV file must contain columns - {MANDATORY_INPUT_COLUMNS}"

    print("after assert")

    processed_player_data = pd.get_dummies(
        raw_data, columns=["player_category", "team_name"]
    )
    
    processed_player_data["weighted_player_points"] = processed_player_data[
        "last_5_matches_points"
    ].apply(compute_weighted_points)

    print("after processed_player_data")
    decision_variables_df = _get_decision_variables(processed_player_data)
    assert len(decision_variables_df) == len(processed_player_data), (
        f"Number of Decision Variables must be equal to the"
        f"number of rows in the input file. Expected {len(processed_player_data)}"
        f"Received {len(decision_variables_df)}"
    )
    print("decision_variables_df")
    merged_processed_players_df = pd.merge(
        processed_player_data, decision_variables_df, on="player_name"
    )
    merged_processed_players_df["pulp_variable_name"] = merged_processed_players_df[
        "pulp_variable"
    ].apply(lambda x: x.name)
    print("after lambda")

    optimization_prob = _get_optimization_function(merged_processed_players_df)
    
    optimization_prob = _add_constraints(merged_processed_players_df, optimization_prob)
    print("optimization_result")
    optimization_result = optimization_prob.solve()
    

    assert optimization_result != p.LpStatusNotSolved

    solution_df = pd.DataFrame(
        [
            {"pulp_variable_name": v.name, "is_selected": v.varValue}
            for v in optimization_prob.variables()
        ]
    )
    optimized_players_df = pd.merge(
        merged_processed_players_df, solution_df, on="pulp_variable_name"
    )

    dream_team_df = optimized_players_df.loc[
        optimized_players_df.is_selected == 1, HUMANFRIENDLY_COLUMNS
    ]

    assert len(dream_team_df) == MAX_PLAYERS, (
        f"there should be {MAX_PLAYERS} in the team but there are "
        f"{len(dream_team_df)} players. Something went wrong."
    )
    logger.debug(
        f"This Team can earn you an estimated of {dream_team_df['weighted_player_points'].sum()}"
    #print("This Team can earn you an estimated of {dream_team_df['weighted_player_points'].sum()}")
    )
    logger.debug(dream_team_df)
    return dream_team_df

In [17]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--player_data_filepath', default="/content/gdrive/My Drive/RR_vs_PKS.xlsx",
                    type=str, help='Path to testfile')

    args = parser.parse_args(args = [])
    dream_team_df =caller(args.player_data_filepath)

    print(dream_team_df)

/content/gdrive/My Drive/RR_vs_PKS.xlsx
    cost   last_5_matches_points player_category        player_name team_name
0    9.0         [0, 0, 0, 0, 0]         batsman         Evin Lewis        RR
1    8.5      [18, 50, 39, 0, 0]         batsman   Yashasvi Jaiswal        RR
2    9.5    [66, 59, 62, 29, 25]   wicket_keeper       Sanju Samson        RR
3    8.5    [21, 13, 20, 37, 35]         batsman        Riyan Parag        RR
4    8.5     [0, 49, 30, 65, 31]     all_rounder       Shivam Dube         RR
5    9.0         [0, 0, 0, 0, 0]     all_rounder   Liam Livingstone        RR
6    9.0      [25, 2, 9, 58, 57]     all_rounder      Rahul Tewatia        RR
7    9.5  [107, 54, 136, 10, 60]     all_rounder       Chris Morris        RR
8    8.5      [4, 14, 37, 4, 83]          bowler    Chetan Sakariya        RR
9    8.0        [29, 0, 0, 0, 0]          bowler       Kartik Tyagi        RR
10   9.0     [95, 35, 33, 4, 41]          bowler  Mustafizur Rahman        RR
11   8.5         [0, 0, 

DEBUG:/usr/local/lib/python3.7/dist-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/ad10e258b45845bfa014e293aa03464d-pulp.mps max branch printingOptions all solution /tmp/ad10e258b45845bfa014e293aa03464d-pulp.sol 
DEBUG:This Team can earn you an estimated of 438.96656687605014
DEBUG:          player_name  weighted_player_points  ...  cost   last_5_matches_points
2        Sanju Samson               43.685194  ...   9.5    [66, 59, 62, 29, 25]
3         Riyan Parag               27.319951  ...   8.5    [21, 13, 20, 37, 35]
4        Shivam Dube                37.628812  ...   8.5     [0, 49, 30, 65, 31]
7        Chris Morris               67.981603  ...   9.5  [107, 54, 136, 10, 60]
8     Chetan Sakariya               34.571502  ...   8.5      [4, 14, 37, 4, 83]
10  Mustafizur Rahman               36.788960  ...   9.0     [95, 35, 33, 4, 41]
24           KL Rahul               50.307171  ...  11.0     [0, 132, 33, 89, 8]
27     Mayank Agarwal               45.166733  ...   9.5    [1

    cost   last_5_matches_points player_category        player_name team_name
0    9.0         [0, 0, 0, 0, 0]         batsman         Evin Lewis        RR
1    8.5      [18, 50, 39, 0, 0]         batsman   Yashasvi Jaiswal        RR
2    9.5    [66, 59, 62, 29, 25]   wicket_keeper       Sanju Samson        RR
3    8.5    [21, 13, 20, 37, 35]         batsman        Riyan Parag        RR
4    8.5     [0, 49, 30, 65, 31]     all_rounder       Shivam Dube         RR
5    9.0         [0, 0, 0, 0, 0]     all_rounder   Liam Livingstone        RR
6    9.0      [25, 2, 9, 58, 57]     all_rounder      Rahul Tewatia        RR
7    9.5  [107, 54, 136, 10, 60]     all_rounder       Chris Morris        RR
8    8.5      [4, 14, 37, 4, 83]          bowler    Chetan Sakariya        RR
9    8.0        [29, 0, 0, 0, 0]          bowler       Kartik Tyagi        RR
10   9.0     [95, 35, 33, 4, 41]          bowler  Mustafizur Rahman        RR
11   8.5         [0, 0, 0, 0, 0]          bowler     Tabraiz Sha

In [15]:
!ls "/content/gdrive/My Drive/RR_vs_PKS.xlsx"

'/content/gdrive/My Drive/RR_vs_PKS.xlsx'
