In [116]:
baseballdatainfo = {
    "masterfile": "Master_2016.csv",   # Name of Master CSV file
    "battingfile": "Batting_2016.csv", # Name of Batting CSV file
    "separator": ",",                  # Separator character in CSV files
    "quote": '"',                      # Quote character in CSV files
    "playerid": "playerID",            # Player ID field name
    "firstname": "nameFirst",          # First name field name
    "lastname": "nameLast",            # Last name field name
    "yearid": "yearID",                # Year field name
    "atbats": "AB",                    # At bats field name
    "hits": "H",                       # Hits field name
    "doubles": "2B",                   # Doubles field name
    "triples": "3B",                   # Triples field name
    "homeruns": "HR",                  # Home runs field name
    "walks": "BB",                     # Walks field name
    "battingfields": ["AB", "H", "2B", "3B", "HR", "BB"]
}

statistics = []  # batting_2016.csv file extracted as list_of_dictionaries
with open(baseballdatainfo['battingfile'], newline='') as csvfile:
    csvreader = csv.DictReader(csvfile, delimiter=',', quotechar="'")
    for row in csvreader:
        statistics.append(row)
        
        
player_names = []
with open(baseballdatainfo['masterfile'], newline = '') as master_file:
    master_reader = csv.DictReader(master_file, delimiter=',', quotechar = "'")
    for row in master_reader:
        player_names.append(row) 

In [117]:

# Typical cutoff used for official statistics
MINIMUM_AB = 500

def batting_average(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the batting average as a float
    """
    hits = float(batting_stats[info["hits"]])
    at_bats = float(batting_stats[info["atbats"]])
    if at_bats >= MINIMUM_AB:
        return hits / at_bats
    else:
        return 0

def onbase_percentage(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the on-base percentage as a float
    """
    hits = float(batting_stats[info["hits"]])
    at_bats = float(batting_stats[info["atbats"]])
    walks = float(batting_stats[info["walks"]])
    if at_bats >= MINIMUM_AB:
        return (hits + walks) / (at_bats + walks)
    else:
        return 0

def slugging_percentage(info, batting_stats):
    """
    Inputs:
      batting_stats - dictionary of batting statistics (values are strings)
    Output:
      Returns the slugging percentage as a float
    """
    hits = float(batting_stats[info["hits"]])
    doubles = float(batting_stats[info["doubles"]])
    triples = float(batting_stats[info["triples"]])
    home_runs = float(batting_stats[info["homeruns"]])
    singles = hits - doubles - triples - home_runs
    at_bats = float(batting_stats[info["atbats"]])
    if at_bats >= MINIMUM_AB:
        return (singles + 2 * doubles + 3 * triples + 4 * home_runs) / at_bats
    else:
        return 0

# 1st function

In [118]:

import csv

def filter_by_year(statistics, year, yearid):
    filtered_stats = []
    for row in statistics:
        if year == int(row[yearid]): # Comparing every row's year to the input year value
            filtered_stats.append(row) # Appending row, belonging to the input year
    return filtered_stats       # list of dictionaries

# filter_by_year(statistics, 1935, "yearID") # filters list of dictionaries are 513 for year 1935



## 2nd function

In [119]:
def top_player_ids(info, statistics, formula, numplayers):
    # preparing list of playerid and list of stats
    playerid_list = []
    stats_list = []
    for row in statistics:
        playerid_list.append(row[info['playerid']])
        if formula == slugging_percentage:
            stats_list.append(slugging_percentage(info, row))
        elif formula == onbase_percentage:
            stats_list.append(onbase_percentage(info, row))
        else:
            stats_list.append(batting_average(info, row))

    # building the list of tuples
    player_list_of_tuple = list(map(lambda player, stats : (player, stats), playerid_list, stats_list)) 

    # reversing the obtained list of tuples based on the stats value
    player_list_of_tuple.sort(key = lambda x:x[1], reverse=True)   
    
    # extracting the top 10 players from the obtained list of tuples
    top_ids_and_stats = []
    for x in range(numplayers):
        top_ids_and_stats.append(player_list_of_tuple[x])
        
    return top_ids_and_stats # list of tuples

# top_ids_and_stats = top_player_ids(baseballdatainfo, statistics, onbase_percentage,  10)  
# print(top_ids_and_stats)


# 3rd function

In [120]:

def lookup_player_names(info, top_ids_and_stats):
    player_names = []
    with open("Master_2016.csv", newline = '') as master_file:
        master_reader = csv.DictReader(master_file, delimiter=',', quotechar = "'")
        
        """Reading the row of master file, then comparing the playerID from row to the playerID from tuple."""
        for row in master_reader:
            for id in top_ids_and_stats:
                'if both the ids match up, then we add the required values from specific to the new list'
                if id[0] == row[info['playerid']]:
                    player_names.append(str(f'{id[1]:.3f}')+" --- " + row[info['firstname']]+ " " + row[info['lastname']])  
    
    return player_names # list of strings
# print(lookup_player_names(baseballdatainfo, top_ids_and_stats))

# 4th function

In [121]:
def compute_top_stats_year(info, formula, numplayers, year):
    
    yearbased_list_of_dictionary = filter_by_year(statistics, year, 'yearID')
    
    top_ids_and_stats = top_player_ids(info, statistics, formula, numplayers)
    
    player_names =  lookup_player_names(info, top_ids_and_stats)
    
    return player_names  

# xyz = compute_top_stats_year(baseballdatainfo, slugging_percentage, 20, 1936)

In [122]:
print (xyz)

['0.749 --- Jimmie Foxx', '0.704 --- Jimmie Foxx', '0.703 --- Jimmie Foxx', '0.765 --- Lou Gehrig', '0.721 --- Lou Gehrig', '0.706 --- Lou Gehrig', '0.756 --- Rogers Hornsby', '0.722 --- Rogers Hornsby', '0.705 --- Mickey Mantle', '0.752 --- Mark McGwire', '0.846 --- Babe Ruth', '0.772 --- Babe Ruth', '0.764 --- Babe Ruth', '0.739 --- Babe Ruth', '0.732 --- Babe Ruth', '0.709 --- Babe Ruth', '0.708 --- Al Simmons', '0.737 --- Sammy Sosa', '0.720 --- Larry Walker', '0.723 --- Hack Wilson']


# 5th function

In [123]:
def aggregate_by_player_id(statistics, playerid, fields):
    
    aggregate_fields = {playerid:{'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}}

    for row in statistics:
        if row['playerID'] == playerid:            
            for field in fields:
                   aggregate_fields[playerid][field] = float(aggregate_fields[playerid][field]) + float(row[field])
                       
    return aggregate_fields

# print(aggregate_by_player_id(statistics, playerid, fields))

In [135]:
def unique_work(info, statistics):
    unique_ids = []
    for row in statistics:
        unique_ids.append(row['playerID'])
    unique_ids = list(set(unique_ids))  # 18915 unique ids
   
    aggregate_fields = {} # {playerid:{'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}}
    unique_agg_stat = []
    for playerid in unique_ids:
        for row in statistics:
            if row['playerID'] == playerid:
                aggregate_fields[playerid] = {'AB':0, 'H':0, '2B':0, '3B':0, 'HR':0, 'BB':0}  # creating new dict inside a dict
                for field in info['battingfields']:
                       aggregate_fields[playerid][field] = float(aggregate_fields[playerid][field]) + float(row[field])
                aggregate_fields[playerid]['playerID'] = playerid
                unique_agg_stat.append(aggregate_fields[playerid])
                
    return unique_agg_stat

# 6th function

In [136]:
def compute_top_stats_career(info, formula, numplayers):
    
    # aggregating players statistics to decrase the workload
    unique_stats = unique_work(baseballdatainfo, statistics)
    
    # calculating top players by their career score
    top_ids_and_stats = top_player_ids(info, unique_stats, formula, numplayers) 
    
    # extracting top player names
    print(lookup_player_names(info, top_ids_and_stats))

compute_top_stats_career(baseballdatainfo, slugging_percentage, 20)

['0.749 --- Jimmie Foxx', '0.704 --- Jimmie Foxx', '0.703 --- Jimmie Foxx', '0.765 --- Lou Gehrig', '0.721 --- Lou Gehrig', '0.706 --- Lou Gehrig', '0.756 --- Rogers Hornsby', '0.722 --- Rogers Hornsby', '0.705 --- Mickey Mantle', '0.752 --- Mark McGwire', '0.846 --- Babe Ruth', '0.772 --- Babe Ruth', '0.764 --- Babe Ruth', '0.739 --- Babe Ruth', '0.732 --- Babe Ruth', '0.709 --- Babe Ruth', '0.708 --- Al Simmons', '0.737 --- Sammy Sosa', '0.720 --- Larry Walker', '0.723 --- Hack Wilson']


# Testing

In [None]:
def test_baseball_statistics():
    """
    Simple testing code.
    """c
    print("Top 5 batting averages in 1923")
    top_batting_average_1923 = compute_top_stats_year(baseballdatainfo, batting_average, 5, 1923)
    for player in top_batting_average_1923:
        print(player)
    print("")

    print("Top 10 batting averages in 2010")
    top_batting_average_2010 = compute_top_stats_year(baseballdatainfo, batting_average, 10, 2010)
    for player in top_batting_average_2010:
        print(player)
    print("")

    print("Top 10 on-base percentage in 2010")
    top_onbase_2010 = compute_top_stats_year(baseballdatainfo, onbase_percentage, 10, 2010)
    for player in top_onbase_2010:
        print(player)
    print("")

    print("Top 10 slugging percentage in 2010")
    top_slugging_2010 = compute_top_stats_year(baseballdatainfo, slugging_percentage, 10, 2010)
    for player in top_slugging_2010:
        print(player)
    print("")

    # You can also use lambdas for the formula
    #  This one computes onbase plus slugging percentage
    print("Top 10 OPS in 2010")
    top_ops_2010 = compute_top_stats_year(baseballdatainfo,
                                          lambda info, stats: (onbase_percentage(info, stats) +
                                                               slugging_percentage(info, stats)),
                                          10, 2010)
    for player in top_ops_2010:
        print(player)
    print("")

    print("Top 20 career batting averages")
    top_batting_average_career = compute_top_stats_career(baseballdatainfo, batting_average, 20)
    for player in top_batting_average_career:
        print(player)
    print("")


# Make sure the following call to test_baseball_statistics is
# commented out when submitting to OwlTest/CourseraTest.

test_baseball_statistics()