In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the MLB 2023 Standard Batting page
base_url = 'https://www.baseball-reference.com'
main_url = f'{base_url}/leagues/majors/2023-standard-batting.shtml'

# Request the page and parse the HTML content
response = requests.get(main_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Target the specific div for Player Standard Batting
batting_div = soup.find('div', id='div_players_standard_batting')

# Initialize a dictionary to hold unique player records
unique_players = {}

# Collect all player names, ages, at-bats, and team information
if batting_div:
    for row in batting_div.find_all('tr')[1:]:  # Skip the header row
        player_tag = row.find('td', {'data-stat': 'name_display'})
        age_tag = row.find('td', {'data-stat': 'age'})  # Age column
        team_tag = row.find('td', {'data-stat': 'team_name_abbr'})  # Team abbreviation
        at_bats_tag = row.find('td', {'data-stat': 'b_ab'})  # At-bats column
        home_runs_tag = row.find('td', {'data-stat': 'b_hr'})  # Home Runs column
        
        if player_tag and age_tag and team_tag and at_bats_tag and home_runs_tag:
            player_name = player_tag.text.strip()
            age = age_tag.text.strip()
            team_name = team_tag.text.strip()
            at_bats = at_bats_tag.text.strip()
            home_runs = home_runs_tag.text.strip()
            
            if not at_bats or not home_runs:
                continue
            
            at_bats = int(at_bats)
            home_runs = int(home_runs)
            
            if "League Average" in player_name:
                continue
            
            # Create a unique key based on player name and age
            player_key = (player_name, age)
            
            # Add or update the player's record in the dictionary
            if player_key not in unique_players:
                unique_players[player_key] = {
                    'Player Name': player_name,
                    'Age': age,
                    'Team': team_name,
                    'At Bats': at_bats,
                    'Home Runs': home_runs
                }
            else:
                current_record = unique_players[player_key]
                current_team = current_record['Team']
                current_at_bats = current_record['At Bats']
                
                if current_team != '2TM' and team_name == '2TM':
                    unique_players[player_key]['Team'] = team_name
                    unique_players[player_key]['At Bats'] = at_bats
                    unique_players[player_key]['Home Runs'] = home_runs
                elif current_team == '2TM' and team_name == '3TM':
                    continue

df_players = pd.DataFrame(unique_players.values())

print(df_players)

# Add Home Runs per At-Bat column
df_players['Home Runs per At-Bat'] = df_players['Home Runs'] / df_players['At Bats']

# Sort by 'At Bats' in descending order and select the top 200
top_200_players = df_players.sort_values(by='At Bats', ascending=False).head(200)

print(top_200_players)


           Player Name Age Team  At Bats  Home Runs
0        Marcus Semien  32  TEX      670         29
1    Ronald AcuÃ±a Jr.  25  ATL      643         41
2     Freddie Freeman*  33  LAD      637         29
3         Alex Bregman  29  HOU      622         25
4      Nathaniel Lowe*  27  TEX      623         17
..                 ...  ..  ...      ...        ...
760    Hayden Wesneski  25  CHC        0          0
761   Garrett Whitlock  27  BOS        0          0
762      Nick Wittgren  32  KCR        0          0
763     Ryan Yarbrough  31  LAD        0          0
764        Alex Young*  29  CIN        0          0

[765 rows x 5 columns]
           Player Name Age Team  At Bats  Home Runs  Home Runs per At-Bat
0        Marcus Semien  32  TEX      670         29              0.043284
9     Julio RodrÃ­guez  22  SEA      654         32              0.048930
1    Ronald AcuÃ±a Jr.  25  ATL      643         41              0.063764
12      Bobby Witt Jr.  23  KCR      641         30     