In [None]:
def calculate_course_handicap(handicap_index, course_rating, slope_rating, par=None):
    """
    Calculate the course handicap based on the golfer's handicap index, 
    course rating, slope rating, and optional par.

    Args:
        handicap_index (float): The golfer's handicap index.
        course_rating (float): The course rating.
        slope_rating (int): The slope rating of the course.
        par (int, optional): Par for the course (used in full formula).

    Returns:
        int: The calculated course handicap (rounded to nearest integer).
    """
    if par is not None:
        # Full USGA formula
        course_handicap = handicap_index * (slope_rating / 113) + (course_rating - par)
    else:
        # Simplified formula
        course_handicap = handicap_index * (slope_rating / 113)

    return round(course_handicap)

In [None]:
cr = calculate_course_handicap(12.7,31.3,99)
print(cr)  # Output: 12

In [None]:
import pandas as pd

# Load raw sheet
xls_path = "/home/justin/JustInternetAI/Calcutta/Data/DirtyData/Posted_Report_2025V2.xlsx"
df_raw = pd.read_excel(xls_path)

# Columns to retain
columns_to_keep = [
    "Golfer Name", "Handicap Index", "Date Played", "AGS", "Holes Played",
    "Course Rating", "Slope Rating", "Differential", "Score H.I.", "NSD", "Course Played"
]

parsed_rows = []
current_name = None
current_index = None

for _, row in df_raw.iterrows():
    # Update current golfer info if present
    if pd.notnull(row.get("Golfer Name")):
        current_name = row["Golfer Name"]
        current_index = row["Handicap Index"]

    # Skip if we still don't have a valid golfer name
    if not current_name:
        continue

    # Build cleaned row using most recent name/index
    parsed_row = {
        "Golfer Name": current_name,
        "Handicap Index": current_index
    }

    # Add other requested fields
    for col in columns_to_keep[2:]:  # Skip the first two, already handled
        parsed_row[col] = row.get(col, None)

    parsed_rows.append(parsed_row)

# Convert to DataFrame
df_parsed = pd.DataFrame(parsed_rows)

# Display preview
print(f"✅ Parsed {len(df_parsed)} rounds")
display(df_parsed.head())

In [None]:
def remove_duplicate_name_date_rows(parsed_rows):
    """
    Removes rows that have the same golfer name and date played.
    Keeps only the first occurrence.
    """
    seen = set()
    filtered = []

    for row in parsed_rows:
        golfer_name = row[1]
        date_played = row[23]
        total = row[22]
        key = (golfer_name, date_played, total)

        if key not in seen:
            seen.add(key)
            filtered.append(row)

    return filtered

In [None]:
import pandas as pd
from datetime import datetime
from pathlib import Path

# Function to calculate Course Handicap
def calculate_course_handicap(handicap_index, course_rating, slope_rating):
    return round(handicap_index * slope_rating / 113 + (course_rating - 72))

# Ingest function for "Posted Report 2024.xlsx"
def parse_posted_report(file_path):
    df_raw = pd.read_excel(file_path)

    parsed_rows = []
    current_name = None
    current_index = None

    for idx in range(len(df_raw)):
        row = df_raw.iloc[idx]

        if pd.notnull(row['Golfer Name']):
            current_name = row['Golfer Name']
            current_index = row['Handicap Index']

        if pd.notnull(row['Date Played']) and current_name is not None:
            try:
                hi = float(current_index)
            except (ValueError, TypeError):
                print(f"❌ Skipping row {idx+2}: Invalid Handicap Index '{current_index}' for golfer {current_name}")
                continue

            try:
                cr = float(row['Course Rating']) if pd.notnull(row['Course Rating']) else None
                sr = int(row['Slope Rating']) if pd.notnull(row['Slope Rating']) else None
                ch = int(calculate_course_handicap(hi, cr, sr)) if cr is not None and sr is not None else None
            except Exception as e:
                print(f"⚠️ Skipping Course Handicap calculation in row {idx+2}: {e}")
                ch = None

            try:
                score_hi = float(row['Score H.I.']) if pd.notnull(row['Score H.I.']) and row['Score H.I.'] != 'NH' else None
            except ValueError:
                print(f"❌ Skipping row {idx+2}: Invalid Score H.I. '{row['Score H.I.']}'")
                score_hi = None

            try:
                nsd = float(row['NSD']) if pd.notnull(row['NSD']) else None
            except ValueError:
                print(f"❌ Skipping row {idx+2}: Invalid NSD '{row['NSD']}'")
                nsd = None

            try:
                parsed_rows.append({
                    'Golfer Name': current_name,
                    'Handicap Index': hi,
                    'Date Played': row['Date Played'].date() if isinstance(row['Date Played'], datetime) else row['Date Played'],
                    'AGS': int(row['AGS']) if pd.notnull(row['AGS']) else None,
                    'Holes Played': int(row['Holes Played']) if pd.notnull(row['Holes Played']) else None,
                    'Course Rating': cr,
                    'Slope Rating': sr,
                    'Course Handicap': ch,
                    'Differential': float(row['Differential']) if pd.notnull(row['Differential']) else None,
                    'Score H.I.': score_hi,
                    'NSD': nsd,
                    'Course Played': row['Course Played'] if 'Course Played' in row else None
                })
            except Exception as e:
                print(f"❌ Failed to parse row {idx+2} for golfer {current_name}: {e}")

    df_parsed = pd.DataFrame(parsed_rows)
    return df_parsed

In [None]:
df_parsed = parse_posted_report("/home/justin/JustInternetAI/Calcutta/Data/DirtyData/Posted_Report_2025V2.xlsx")
df_parsed.to_excel("/home/justin/JustInternetAI/Calcutta/Data/DirtyData/Cleaned_Posted_Report_2025V2.xlsx", index=False)

In [None]:
# Parse GolfData spreadsheet in notebook cell
import pandas as pd
from pathlib import Path
from datetime import datetime

# Path to your file
file_path = Path("/home/justin/JustInternetAI/Calcutta/Data/DirtyData/Hole2025V2.xlsx")  # Replace if needed

# Load the raw Excel data
df_raw = pd.read_excel(file_path, header=None)

event = "individual"

# Columns to extract in output
output_columns = [
    "Event", "Player", "Handicap", "Tee"
] + [str(i) for i in range(1, 19)] + ["Total"] + ["Date"] + ["Index"] + ["CR", "SR"]

# Placeholder for cleaned rows
parsed_rows = []
current_name = None
current_tee = None

# Iterate over rows
for idx in range(len(df_raw)):
    if idx <= 0:
        continue  # Skip header rows
    row = df_raw.iloc[idx]
    name = row[1]
    
    # If there's a new golfer name in this row, update context only
    if pd.notnull(name) and isinstance(name, str):
        current_name = name.strip()
        current_tee = row[6]
        if pd.notnull(row[8]) and pd.notnull(row[9]):
            cr = float(row[8])
            sr = float(row[9])
        else:
            print(f"⚠️ Missing course rating or slope rating in row {idx + 2}")
            continue  # Skip this row; next row has valid data
        continue

    # If no current name is set, skip (invalid row before name row)
    if not current_name:
        continue

    # Extract relevant columns from this row
    try:
        date_played = pd.to_datetime(row[2]).date() if pd.notnull(row[2]) else None
        if pd.notnull(row[3]):
            handicap_index = float(row[3])
        course_handicap = calculate_course_handicap(handicap_index,cr,sr)
        hole_scores = row[10:19].tolist() + row[20:29].tolist() # Holes 1–18
        total = row[30]

        parsed_rows.append([
            event,
            current_name,
            course_handicap,
            current_tee
        ] + hole_scores + [total] + [date_played] + [handicap_index] + [cr, sr])

    except Exception as e:
        print(f"⚠️ Skipping row {idx+1}:  handicap_index ={handicap_index}, cr = {cr}, sr = {sr}    {e}")


parsed_rows = remove_duplicate_name_date_rows(parsed_rows)

# Create cleaned DataFrame
df_cleaned = pd.DataFrame(parsed_rows, columns=output_columns)

# Save to Excel
output_path = Path("/home/justin/JustInternetAI/Calcutta/Data/DirtyData/cleaned_hole_by_hole_2025.xlsx")
df_cleaned.to_excel(output_path, index=False)

print(f"✅ Parsed and saved to: {output_path}")

In [None]:
# Using simplified version:
hcp = calculate_course_handicap(handicap_index=14.4, course_rating=70.5, slope_rating=128)
print(f"Course Handicap: {hcp}")
