In [None]:
baseballdatainfo = {
    "masterfile": "Master_2016.csv",   # Name of Master CSV file
    "battingfile": "Batting_2016.csv", # Name of Batting CSV file
    "separator": ",",                  # Separator character in CSV files
    "quote": '"',                      # Quote character in CSV files
    "playerid": "playerID",            # Player ID field name
    "firstname": "nameFirst",          # First name field name
    "lastname": "nameLast",            # Last name field name
    "yearid": "yearID",                # Year field name
    "atbats": "AB",                    # At bats field name
    "hits": "H",                       # Hits field name
    "doubles": "2B",                   # Doubles field name
    "triples": "3B",                   # Triples field name
    "homeruns": "HR",                  # Home runs field name
    "walks": "BB",                     # Walks field name
    "battingfields": ["AB", "H", "2B", "3B", "HR", "BB"]
}

statistics = []  # batting_2016.csv file extracted as list_of_dictionaries
with open(baseballdatainfo['battingfile'], newline='') as csvfile:
    csvreader = csv.DictReader(csvfile, delimiter=',', quotechar="'")
    for row in csvreader:
        statistics.append(row)
        
        
player_names = []
with open(baseballdatainfo['masterfile'], newline = '') as master_file:
    master_reader = csv.DictReader(master_file, delimiter=',', quotechar = "'")
    for row in master_reader:
        player_names.append(row) 

In [None]:

# Typical cutoff used for official statistics
MINIMUM_AB = 500

def batting_average(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the batting average as a float
    """
    hits = float(batting_stats[info["hits"]])
    at_bats = float(batting_stats[info["atbats"]])
    if at_bats >= MINIMUM_AB:
        return hits / at_bats
    else:
        return 0

def onbase_percentage(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the on-base percentage as a float
    """
    hits = float(batting_stats[info["hits"]])
    at_bats = float(batting_stats[info["atbats"]])
    walks = float(batting_stats[info["walks"]])
    if at_bats >= MINIMUM_AB:
        return (hits + walks) / (at_bats + walks)
    else:
        return 0

def slugging_percentage(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the slugging percentage as a float
    """
    hits = float(batting_stats[info["hits"]])
    doubles = float(batting_stats[info["doubles"]])
    triples = float(batting_stats[info["triples"]])
    home_runs = float(batting_stats[info["homeruns"]])
    singles = hits - doubles - triples - home_runs
    at_bats = float(batting_stats[info["atbats"]])
    if at_bats >= MINIMUM_AB:
        return (singles + 2 * doubles + 3 * triples + 4 * home_runs) / at_bats
    else:
        return 0

# 1st function

In [None]:

import csv

def filter_by_year(statistics, year, yearid):
    filtered_stats = []
    for row in statistics:
        if year == int(row[yearid]): # Comparing every row's year to the input year value
            filtered_stats.append(row) # Appending row, belonging to the input year
    return filtered_stats       

filter_by_year(statistics, 1935, "yearID") # filters list of dictionaries are 513 for year 1935



## 2nd function

In [100]:
MINIMUM_AB = 500

def slugging_percentage(info, batting_stats):
    
    #     hits = float(batting_stats[info["hits"]])
    #     doubles = float(batting_stats[info["doubles"]])
    #     triples = float(batting_stats[info["triples"]])
    #     home_runs = float(batting_stats[info["homeruns"]])
    #     singles = hits - doubles - triples - home_runs
    #     at_bats = float(batting_stats[info["atbats"]])
    
    stats_list = []
    for row in batting_stats:
        hits = float(row[info["hits"]])
        doubles = float(row[info["doubles"]])
        triples = float(row[info["triples"]])
        home_runs = float(row[info["homeruns"]])
        singles = hits - doubles - triples - home_runs
        at_bats = float(row[info["atbats"]])
        
        if (at_bats >= MINIMUM_AB):
            stats_list.append(float((singles + 2 * doubles + 3 * triples + 4 * home_runs) / (at_bats)))
    
    return stats_list   
       
def top_player_ids(info, statistics, onbase_percentage, numplayers):
    # preparing list of playerid and list of stats
    playerid_list = []
    stats_list = []
    for row in statistics:
        playerid_list.append(row[info['playerid']])
        stats_list.append(onbase_percentage(info, row))
    
    # building the list of tuples
    player_list_of_tuple = list(map(lambda player, stats : (player, stats), playerid_list, stats_list)) 

    # reversing the obtained list of tuples based on the stats value
    player_list_of_tuple.sort(key = lambda x:x[1], reverse=True)   
    
    # extracting the top 10 players from the obtained list of tuples
    top_ids_and_stats = []
    for x in range(numplayers):
        top_ids_and_stats.append(player_list_of_tuple[x])
        
    return (top_ids_and_stats)

top_ids_and_stats = top_player_ids(baseballdatainfo, statistics, onbase_percentage,  10)       


TypeError: string indices must be integers

# 3rd function

In [None]:

def lookup_player_names(info, top_ids_and_stats):
    player_names = []
    with open("Master_2016.csv", newline = '') as master_file:
        master_reader = csv.DictReader(master_file, delimiter=',', quotechar = "'")
        for row in master_reader:
            for id in top_ids_and_stats:
                if id[0] == row[info['playerid']]:
                    player_names.append(str(f'{id[1]:.3f}')+" --- " + row[info['firstname']]+ " " + row[info['lastname']])  
    
    return player_names
print(lookup_player_names(baseballdatainfo, top_ids_and_stats))

# 4th function

In [None]:
def compute_top_stats_year(info, formula, numplayers, year):
    yearbased_list_of_dictionary = []
    for row in statistics:
        if int(row['yearID']) == year:
            yearbased_list_of_dictionary.append(row)
    
    list_of_top_players_by_year = []
    for row in yearbased_list_of_dictionary:
#             list_of_top_players_by_year.append(slugging_percentage(baseballdatainfo, yearbased_list_of_dictionary))
        stat = f'{slugging_percentage(baseballdatainfo, row):.5f}'
        with open("Master_2016.csv", newline = '') as master_file:
            master_reader = csv.DictReader(master_file, delimiter=',', quotechar = "'")
            for row2 in master_reader:
                if row2[info['playerid']] == row[info['playerid']]:
                    list_of_top_players_by_year.append(str(stat)+" --- " + row2[info['firstname']]+ " " + row2[info['lastname']])  

    return list_of_top_players_by_year

xyz = compute_top_stats_year(baseballdatainfo, slugging_percentage, 20, 1936)

# 5th function

In [None]:
def aggregate_by_player_id(statistics, playerid, fields):
    aggregate_fields = {playerid:{'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}}

    for row in statistics:
        if row['playerID'] == playerid:            
            for field in fields:
                   aggregate_fields[playerid][baseballdatainfo[field]] = float(aggregate_fields[playerid][baseballdatainfo[field]]) + float(row[baseballdatainfo[field]])
                       
    return aggregate_fields
print(aggregate_by_player_id(statistics, playerid, fields))

# 6th function

In [None]:
def compute_top_stats_career(info, formula, numplayers):
   
    unique_statistics = statistics
    player_ids = []
    for row in statistics:
        player_ids.append(row['playerID'])
    player_ids = list(set(player_ids)) 
    # the set function eliminates the repititive values and creates unique values set (18915 unique IDs)

    fields = ['atbats','hits', 'doubles', 'triples', 'homeruns','walks']
    
    career_high_score = {}
    aggregate_fields = {} # {playerid:{'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}}

    for playerid in player_ids:
        temp = aggregate_by_player_id(statistics, playerid, fields)
        aggregate_fields[playerid] = temp[playerid]
            for battle_stats in statistics:
                if formula == slugging_percentage:
                    career_high_score[playerid] =  slugging_percentage(baseballdatainfo, battle_stats)
                elif formula == onbase_percentage:
                    career_high_score[playerid] = onbase_percentage(baseballdatainfo, battle_stats)
                else:
                    career_high_score[playerid] = batting_average(baseballdatainfo, battle_stats)

    list_career_high = [list(x,y) for x,y in career_high_score.items]
    list_career_high.sort(key = lambda pair:pair[1])
    print(list_career_high[0:numplayers])
    return list_career_high

print(compute_top_stats_career(baseballdatainfo, slugging_percentage, 20))

# Bhari kaam handled here

In [None]:
unique_ids = []
for row in statistics:
    unique_ids.append(row['playerID'])
unique_ids = list(set(unique_ids))
# 18915 ids
fields = ['atbats','hits', 'doubles', 'triples', 'homeruns','walks']
aggregate_fields = {} #{playerid:{'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}}


for playerid in unique_ids:
    for row in statistics:
        if row['playerID'] == playerid:
            aggregate_fields[playerid] = {'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}  # creating new dict inside a dict
            for field in fields:
                   aggregate_fields[playerid][baseballdatainfo[field]] = float(aggregate_fields[playerid][baseballdatainfo[field]]) + float(row[baseballdatainfo[field]])

print(len(aggregate_fields))        
    
with open('ok4.csv', 'w', newline = '') as ok_file:
    writer = csv.writer(ok_file, delimiter=',', quotechar = "'")
    # fields = baseballdatainfo['battingfields'].insert(0,'playerID')
    # print(fields)
    for row in aggregate_fields:
        list_to_join = [row]
        for field in baseballdatainfo['battingfields']:
            list_to_join.append(aggregate_fields[row][field])
        writer.writerow(list_to_join)

In [99]:
with open('ok4.csv', 'w', newline = '') as ok_file:
    writer = csv.writer(ok_file, delimiter=',', quotechar = "'")
    # fields = baseballdatainfo['battingfields'].insert(0,'playerID')
    # print(fields)
    for row in aggregate_fields:
        list_to_join = [row]
        for field in baseballdatainfo['battingfields']:
            list_to_join.append(aggregate_fields[row][field])
        writer.writerow(list_to_join)

KeyError: 'playerID'

# Testing