<a href="https://colab.research.google.com/github/JDavila29/The-_Berry_Fairy/blob/main/Women's_Lax_(Jesse)_Finished.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime, timedelta

# Function to get game results for a specific date
def get_game_results(year, month, day):
    url = f"https://www.ncaa.com/scoreboard/lacrosse-women/d1/{year}/{month:02d}/{day:02d}/all-conf"
    print(f"Fetching data from: {url}")

    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve data for {year}-{month:02d}-{day:02d}: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')
    games = []

    # Find game containers
    for game in soup.find_all("div", class_="gamePod"):
        try:
            date = f"{year}-{month:02d}-{day:02d}"
            status = game.find("div", class_="gamePod-status").text.strip()  # e.g., "FINAL"
            round_info = game.find("span", class_="game-round")
            round_name = round_info.text.strip() if round_info else "Unknown Round"

            teams = game.find_all("span", class_="gamePod-game-team-name")
            scores = game.find_all("span", class_="gamePod-game-team-score")
            ranks = game.find_all("span", class_="gamePod-game-team-rank")

            team1 = teams[0].text.strip() if len(teams) > 0 else "Unknown"
            team2 = teams[1].text.strip() if len(teams) > 1 else "Unknown"
            score1 = scores[0].text.strip() if len(scores) > 0 else "N/A"
            score2 = scores[1].text.strip() if len(scores) > 1 else "N/A"
            rank1 = ranks[0].text.strip() if len(ranks) > 0 else "N/A"
            rank2 = ranks[1].text.strip() if len(ranks) > 1 else "N/A"

            winner_class = game.find_all("li", class_="winner")
            winner = winner_class[0].find("span", class_="gamePod-game-team-name").text.strip() if winner_class else "N/A"

            games.append([date, round_name, status, team1, rank1, score1, team2, rank2, score2, winner])

        except Exception as e:
            print(f"Error parsing game data for {date}: {e}")

    return games

# Set the range of years
START_YEAR = 2014
END_YEAR = 2024

# Loop through each year (only August–December)
for year in range(START_YEAR, END_YEAR + 1):
    all_games = []
    start_date = datetime(year, 1, 1)  # September 1st
    end_date = datetime(year, 5, 30)  # March 31st
    current_date = start_date

    while current_date <= end_date:
        month, day = current_date.month, current_date.day
        games_data = get_game_results(year, month, day)

        if games_data:
            all_games.extend(games_data)

        # Respectful scraping: delay between requests
        time.sleep(1.5)

        # Move to the next day
        current_date += timedelta(days=1)

    # Save data for the year (if any games were found)
    if all_games:
        df = pd.DataFrame(all_games, columns=["Date", "Round", "Status", "Team 1", "Rank 1", "Score 1", "Team 2", "Rank 2", "Score 2", "Winner"])
        filename = f"ncaa_womens_lacrosse_results_{year}.csv"
        df.to_csv(filename, index=False)
        print(f"Data for {year} (Aug–Dec) saved to {filename}")
    else:
        print(f"No games found for {year} (Aug–Dec)")

print("Scraping completed for the last 10 seasons (August–December).")

Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/01/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/02/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/03/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/04/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/05/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/06/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/07/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/08/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/09/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/10/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/11/all-conf
Fetching d

# New Section

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import math

class EloRatingSystem:
    def __init__(self, k=32, initial_rating=1500):
        self.k = k  # Standard Elo adjustment factor
        self.ratings = {}  # Dictionary to store team ratings
        self.initial_rating = initial_rating  # Default initial Elo rating


    def expected_score(self, rating1, rating2):
        return 1 / (1 + 10 ** ((rating2 - rating1) / 400))

    def update_ratings(self, team1, team2, score1, score2):
        if team1 not in self.ratings:
            self.ratings[team1] = self.initial_rating
        if team2 not in self.ratings:
            self.ratings[team2] = self.initial_rating

        rating1, rating2 = self.ratings[team1], self.ratings[team2]
        exp_score1 = self.expected_score(rating1, rating2)
        exp_score2 = self.expected_score(rating2, rating1)

        actual_score1 = 1 if score1 > score2 else 0.5 if score1 == score2 else 0
        actual_score2 = 1 - actual_score1

        # Margin of Victory Multiplier
        mov = abs(score1 - score2)
        mov_multiplier = math.log(mov + 1) * (2.2 / (1 + 0.001 * abs(rating1 - rating2)))

        # Update ratings
        self.ratings[team1] += self.k * mov_multiplier * (actual_score1 - exp_score1)
        self.ratings[team2] += self.k * mov_multiplier * (actual_score2 - exp_score2)

    def normalize_end_of_season(self):
        """Applies regression to the mean to avoid long-term inflation/deflation."""
        for team in self.ratings:
            if self.ratings[team] > self.initial_rating:
                self.ratings[team] -= (self.ratings[team] - self.initial_rating) * 0.25
            else:
                self.ratings[team] += (self.initial_rating - self.ratings[team]) * 0.25

    def get_ratings(self):
        return self.ratings




In [None]:
from google.colab import files

# This will prompt you to upload files
uploaded = files.upload()

# Displaying the names of the uploaded files
for filename in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=filename, length=len(uploaded[filename])))




Saving ncaa_womens_lacrosse_results_2014.csv to ncaa_womens_lacrosse_results_2014.csv
Saving ncaa_womens_lacrosse_results_2015.csv to ncaa_womens_lacrosse_results_2015.csv
Saving ncaa_womens_lacrosse_results_2016.csv to ncaa_womens_lacrosse_results_2016.csv
Saving ncaa_womens_lacrosse_results_2017.csv to ncaa_womens_lacrosse_results_2017.csv
Saving ncaa_womens_lacrosse_results_2018.csv to ncaa_womens_lacrosse_results_2018.csv
Saving ncaa_womens_lacrosse_results_2019.csv to ncaa_womens_lacrosse_results_2019.csv
Saving ncaa_womens_lacrosse_results_2020.csv to ncaa_womens_lacrosse_results_2020.csv
Saving ncaa_womens_lacrosse_results_2021.csv to ncaa_womens_lacrosse_results_2021.csv
Saving ncaa_womens_lacrosse_results_2022.csv to ncaa_womens_lacrosse_results_2022.csv
Saving ncaa_womens_lacrosse_results_2023.csv to ncaa_womens_lacrosse_results_2023.csv
Saving ncaa_womens_lacrosse_results_2024.csv to ncaa_womens_lacrosse_results_2024.csv
User uploaded file "ncaa_womens_lacrosse_results_2014.

In [None]:
elo_system = EloRatingSystem()


In [None]:
import pandas as pd
import io

# Assuming 'uploaded' is the dictionary returned by files.upload() containing the content of the files
all_data = []
for filename, content in uploaded.items():
    df = pd.read_csv(io.BytesIO(content))  # Read each file from memory
    all_data.append(df)

# Concatenate all the DataFrames into one DataFrame
combined_data = pd.concat(all_data, ignore_index=True)
combined_data['Date'] = pd.to_datetime(combined_data['Date'])  # Convert 'Date' to datetime
combined_data.sort_values('Date', inplace=True)  # Sort the data by date

# Show the first few rows to confirm
print(combined_data.head())


        Date          Round Status      Team 1  Rank 1  Score 1        Team 2  \
0 2014-02-07  Unknown Round  Final     Liberty     NaN      7.0      Richmond   
1 2014-02-07  Unknown Round  Final     Detroit     NaN      3.0      Ohio St.   
2 2014-02-07  Unknown Round    NaN  Vanderbilt     NaN      0.0  Kennesaw St.   
3 2014-02-07  Unknown Round    NaN        Elon     NaN      0.0          Duke   
4 2014-02-08  Unknown Round  Final        Navy     NaN     14.0      Longwood   

   Rank 2  Score 2    Winner  
0     NaN     16.0  Richmond  
1     NaN     18.0  Ohio St.  
2     NaN      0.0       NaN  
3     NaN      0.0       NaN  
4     NaN      8.0      Navy  


In [None]:
# Update ratings based on the game results
for index, row in combined_data.iterrows():
    if pd.notna(row['Score 1']) and pd.notna(row['Score 2']):
        score1, score2 = int(row['Score 1']), int(row['Score 2'])
        elo_system.update_ratings(row['Team 1'], row['Team 2'], score1, score2)

# Extract and display the final ratings
final_ratings = pd.DataFrame(list(elo_system.get_ratings().items()), columns=['Team', 'Rating']).sort_values(by='Rating', ascending=False)
print(final_ratings)



                  Team       Rating
16        Northwestern  2792.148862
24      Boston College  2775.587251
51            Syracuse  2589.991278
10             Florida  2552.488156
96              Denver  2379.829725
..                 ...          ...
160                FDU   634.227427
111       Gardner-Webb   628.813519
82   Central Conn. St.   582.588260
44        Delaware St.   485.179824
64              Howard   205.930474

[172 rows x 2 columns]


In [None]:
def display_elo_ratings(elo_system):
    # Ensure the ratings are sorted by Elo rating value. This sorts from highest to lowest by default.
    sorted_ratings = sorted(elo_system.ratings.items(), key=lambda x: x[1], reverse=True)

    # Print the sorted ratings
    print("Team Names and Their Elo Ratings:")
    for team, rating in sorted_ratings:
        print(f"{team}: {rating:.2f}")


display_elo_ratings(elo_system)


Team Names and Their Elo Ratings:
Northwestern: 2792.15
Boston College: 2775.59
Syracuse: 2589.99
Florida: 2552.49
Denver: 2379.83
Maryland: 2377.53
Michigan: 2354.06
Notre Dame: 2343.30
North Carolina: 2339.23
Loyola Maryland: 2324.57
Penn: 2317.60
Stony Brook: 2313.27
Yale: 2300.23
Virginia: 2289.49
Johns Hopkins: 2209.16
James Madison: 2150.64
Princeton: 2140.11
Stanford: 2137.11
Loyola (Md.): 2112.20
Navy: 2098.35
Penn St.: 2074.92
USC: 2045.82
Southern California: 2027.05
Clemson: 2023.24
Fairfield: 2021.74
Duke: 2020.49
Massachusetts: 1976.75
Colorado: 1959.01
Drexel: 1953.51
Richmond: 1935.89
Harvard: 1918.98
Rutgers: 1908.42
Louisville: 1891.18
Brown: 1885.69
UConn: 1860.01
Army West Point: 1835.20
Temple: 1818.17
Jacksonville: 1805.63
Albany (NY): 1804.86
Virginia Tech: 1797.53
Coastal Carolina: 1785.89
Niagara: 1771.32
UAlbany: 1757.66
Hofstra: 1753.51
Mercer: 1750.18
Towson: 1748.58
Saint Joseph's: 1744.23
Arizona St.: 1729.45
Vanderbilt: 1727.24
Ohio St.: 1719.02
Dartmouth:

In [None]:
def calculate_win_probability(elo_system, team1, team2):
    if team1 in elo_system.ratings and team2 in elo_system.ratings:
        rating1 = elo_system.ratings[team1]
        rating2 = elo_system.ratings[team2]
        expected_score_team1 = 1 / (1 + 10 ** ((rating2 - rating1) / 400))
        expected_score_team2 = 1 - expected_score_team1
        return expected_score_team1, expected_score_team2
    else:
        return None  # Handle case where one or both teams are not in the ratings




In [None]:
team1 = input("Enter the name of Team 1: ")
team2 = input("Enter the name of Team 2: ")

probabilities = calculate_win_probability(elo_system, team1, team2)
if probabilities:
    print(f"Probability of {team1} winning: {probabilities[0] * 100:.2f}%")
    print(f"Probability of {team2} winning: {probabilities[1] * 100:.2f}%")
else:
    print("One or both of the teams are not found in the current Elo ratings.")


Enter the name of Team 1: Notre Dame
Enter the name of Team 2: Northwestern
Probability of Notre Dame winning: 7.02%
Probability of Northwestern winning: 92.98%
