In [None]:
%pip install catboost==1.0.5
%pip install category-encoders==2.6.1
%pip install scikit-learn==0.22.1
%pip install scipy==1.4.1
%pip install statsmodels==0.11.1
%pip install tqdm==4.46.1
%pip install xgboost==1.1.1
%pip install pulp==2.5.1

In [None]:
from analytics import *
import pandas as pd
import os

#### Converting YAML files to CSV

In [None]:
directory = <PATH OF YAML FILES>
odir = <PATH OF OUTPUT DIRECTORY>
convertAllYaml2PandasDataframesT20(directory,odir)

#### Converting CSV files to each team battingdetails.csv and bowlingdetails.csv

In [None]:
iplTeams = ["Chennai Super Kings","Deccan Chargers","Delhi Daredevils",
            "Kings XI Punjab", 'Kochi Tuskers Kerala',"Kolkata Knight Riders",
            "Mumbai Indians", "Pune Warriors","Rajasthan Royals",
            "Royal Challengers Bangalore","Sunrisers Hyderabad","Gujarat Lions",
            "Rising Pune Supergiants","Gujarat Titans","Lucknow Supergiants"]

directory = <PATH OF CSV FILES>
odir_bat = <PATH OF OUTPUT DIRECTORY>
odir_bowl = <PATH OF OUTPUT DIRECTORY>

for team in iplTeams:
    getTeamBattingDetails(team, dir=directory, save=True, odir=odir_bat)
    getTeamBowlingDetails(team, dir=directory, save=True, odir=odir_bowl)

#### Dream 11 points calculation for batsman

In [None]:
def calculate_points(row):
    total_points = 0
    runs = row['runs']
    balls_faced = row['balls']
    fours = row['4s']
    sixes = row['6s']
    strike_rate = row['SR']

    # Run points
    total_points += runs

    # Boundary Bonus
    total_points += fours

    # Six Bonus
    total_points += 2 * sixes

    # 30 Run Bonus
    if 30 <= runs < 50:
        total_points += 4
        row['30s'] = 1
    else:
        row['30s'] = 0

    # Half-Century Bonus
    if 50 <= runs < 100:
        total_points += 8
        row['50s'] = 1
    else:
        row['50s'] = 0
    
    # Century Bonus
    if runs >= 100:
        total_points += 16
        row['100s'] = 1
    else:
        row['100s'] = 0
        
    # Duck Dismissal Penalty
    if runs == 0:
        total_points -= 2
        row['0s'] = 1
    else:
        row['0s'] = 0

    # Strike Rate Bonus
    if balls_faced > 0 and runs > 0:
        sr = runs / balls_faced * 100
        if sr >= 170:
            total_points += 6
        elif 150.01 <= sr <= 170:
            total_points += 4
        elif 130 <= sr <= 150:
            total_points += 2
        elif 70 <= sr <= 130:
            total_points += 0
        elif 70 >= sr > 60:
            total_points -= 2
        elif 60 > sr >= 50:
            total_points -= 4
        else:
            total_points -= 6

    # Return total points with an additional 4 points
    return total_points + 4

def calculate_dream11_points_for_batsmen(source_dir, dest_dir):
    """
    Calculate Dream 11 points for all CSV files in the source directory
    and save the updated files to the destination directory with the same name.
    """
    for filename in os.listdir(source_dir):
        if filename.endswith('.csv'):
            
            file_path = os.path.join(source_dir, filename)
            team1 = filename.split('-')[0]
            df = pd.read_csv(file_path)

            # Initialize new columns to 0
            df["team1"] = team1
            df['MF'] = 'IPL'
            df['30s'] = 0
            df['50s'] = 0
            df['100s'] = 0
            df['0s'] = 0
            

            df['d11'] = df.apply(calculate_points, axis=1)

            # Update new columns based on runs scored
            df.loc[(df['runs'] >= 30) & (df['runs'] < 50), '30s'] = 1
            df.loc[(df['runs'] >= 50) & (df['runs'] < 100), '50s'] = 1
            df.loc[df['runs'] >= 100, '100s'] = 1
            df.loc[df['runs'] == 0, '0s'] = 1

            new_file_path = os.path.join(dest_dir, filename)
            df.to_csv(new_file_path, index=False)

#### Feature Engineering Dream 11 points to batting.csv

In [None]:
source_dir = <PATH OF SOURCE DIRECTORY>
dest_dir = <PATH OF DESTINATION DIRECTORY>
calculate_dream11_points_for_batsmen(source_dir, dest_dir)

#### Adding all the batting csv files into one csv file

In [None]:
# Set the directory path where the CSV files are located
directory = <>

# Initialize an empty dictionary to store the data frames
dfs = {}

# Loop through all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Read the CSV file into a pandas data frame
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        df = df.iloc[:, 1:]
        # Loop through all the columns in the data frame
        for col in df.columns:
            # If the column is already in the dictionary, append the values to the existing list
            if col in dfs:
                dfs[col].extend(list(df[col]))
            # Otherwise, add the column and its values to the dictionary
            else:
                dfs[col] = list(df[col])

# Convert the dictionary to a data frame
result = pd.DataFrame(dfs)

# Write the concatenated data frame to a CSV file
result.to_csv(<OUTPUT PATH>, index=False)

#### Dream 11 points calculation for bowler

In [None]:
def calculate_bowling_points(row):
    total_points = 0
    wickets = row['wicket']
    if wickets > 0:
        total_points += 25
    if wickets >= 3 and wickets < 4:
        total_points += 4
        row['3_wicket_haul'] = 1
        row['4_wicket_haul'] = 0
        row['5_wicket_haul'] = 0
    elif wickets >= 4 and wickets < 5:
        total_points += 8
        row['3_wicket_haul'] = 0
        row['4_wicket_haul'] = 1
        row['5_wicket_haul'] = 0
    elif wickets >= 5:
        total_points += 16
        row['3_wicket_haul'] = 0
        row['4_wicket_haul'] = 0
        row['5_wicket_haul'] = 1
    else:
        row['3_wicket_haul'] = 0
        row['4_wicket_haul'] = 0
        row['5_wicket_haul'] = 0
    maiden_overs = row['maidens']
    if maiden_overs > 0:
        total_points += 12
    return total_points

def calculate_dream11_points_for_bowlers(source_dir, dest_dir):
    """
    Calculate Dream 11 points for all CSV files in the source directory
    and save the updated files to the destination directory with the same name.
    """
    for filename in os.listdir(source_dir):
        if filename.endswith('.csv'):
            team1 = filename.split('-')[0]
            file_path = os.path.join(source_dir, filename)
            df = pd.read_csv(file_path)
            df["team1"] = team1
            df['MF'] = 'IPL'
            df['3_wicket_haul'] = 0
            df['4_wicket_haul'] = 0
            df['5_wicket_haul'] = 0
            df['d11'] = df.apply(calculate_bowling_points, axis=1)
            df.loc[df['wicket'] >= 3, '3_wicket_haul'] = df.apply(lambda row: 1 if row['wicket'] >= 3 and row['wicket'] < 4 else 0, axis=1)
            df.loc[df['wicket'] >= 4, '4_wicket_haul'] = df.apply(lambda row: 1 if row['wicket'] >= 4 and row['wicket'] < 5 else 0, axis=1)
            df.loc[df['wicket'] >= 5, '5_wicket_haul'] = df.apply(lambda row: 1 if row['wicket'] >= 5 else 0, axis=1)
            new_file_path = os.path.join(dest_dir, filename)
            df.to_csv(new_file_path, index=False)


#### Feature Engineering Dream 11 points for bowling.csv

In [None]:
source_dir = <PATH OF SOURCE DIRECTORY>
dest_dir = <PATH OF DESTINATION DIRECTORY>
calculate_dream11_points_for_bowlers(source_dir, dest_dir)

#### Adding all the bowling csv files into one csv file

In [None]:
# Set the directory path where the CSV files are located
directory = <>

# Initialize an empty dictionary to store the data frames
dfs = {}

# Loop through all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Read the CSV file into a pandas data frame
        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath)
        # Loop through all the columns in the data frame
        for col in df.columns:
            # If the column is already in the dictionary, append the values to the existing list
            if col in dfs:
                dfs[col].extend(list(df[col]))
            # Otherwise, add the column and its values to the dictionary
            else:
                dfs[col] = list(df[col])

# Convert the dictionary to a data frame
result = pd.DataFrame(dfs)

# Write the concatenated data frame to a CSV file
result.to_csv(<OUTPUT PATH>, index=False)