# Apport! - Uitslagen app
Voor het in batch schrapen van wedstrijduitslagen van uitslagen.nl.

**Handleiding:**

Run het programma en volg de instructies. Kies of je een CSV-bestand met namen wilt uploaden of handmatig wilt invoeren. De invoer moet bestaan uit de *volledige* naam en een optioneel geboortejaar.


Voorbeeld:
```
Pieter PAC, 1895,
Jan Janssen,
Teddy Veenlijk, 2014
Hans Abonnement
```


Het ophalen van uitslagen duurt 10-15 seconden per atleet. Na afloop worden de resultaten automatisch gedownload.

In [None]:
import pandas as pd
import numpy as np
import logging
from datetime import datetime
from google.colab import files
from tqdm import tqdm

logging.basicConfig(level=logging.INFO)

def get_input_method():
    print("Kies een methode:")
    print("1. Upload CSV-bestand")
    print("2. Handmatig invoeren")

    while True:
        choice = input("Voor je keuze in: ").strip()
        if choice in ["1", "2"]:
            return choice
        print("Kies 1 of 2.")

def upload_csv():
    upload = files.upload()

    file_name = list(upload.keys())[0]  # Get the uploaded file name

    athlete_input = pd.read_csv(file_name, header=None, dtype=str)

    return athlete_input

def input_csv():
    print("Één atleet per regel <naam, geboortejaar>")
    print("Druk twee keer op enter om door te gaan.")

    input_data = []
    while True:
        line = input().strip()
        if not line:  # Break if the user enters an empty line
            break
        input_data.append(line)

    athletes = []
    for line in input_data:
        parts = line.split(",", maxsplit=1)
        name = parts[0].strip()
        year_of_birth = parts[1].strip() if len(parts) > 1 else None
        athletes.append([name, year_of_birth])

    return pd.DataFrame(athletes)

def get_athlete_data():
    choice = get_input_method()

    if choice == "1":
        return upload_csv()
    elif choice == "2":
        return input_csv()

def parser(athlete_data):
    no_athletes = len(athlete_data)

    # dung beetling
    athlete_data[2] = np.nan
    athlete_data[2] = athlete_data[2].astype(object)

    for i in range(no_athletes):
        # ath = athlete_input.iloc[0] # debug
        name = athlete_data.iloc[i, 0].replace(' ', '+')  # Replace spaces with '+'

        if pd.isna(athlete_data.iloc[i, 1]):
            year = ''
        else:
            year = athlete_data.iloc[i, 1]

        url = f'https://uitslagen.nl/zoek.html?naam={name}&exct=j&gbjr={year}'

        athlete_data.iloc[i, 2] = url

    logging.info(f"Created urls for {no_athletes} athletes")
    athlete_list = athlete_data.values.tolist()

    return athlete_list

def scrape(athlete_list):
    # initialize output df
    headers = ['Datum', 'Evenement', 'Plaats', 'Categorie', 'Afstand', 'Positie', 'Naam', 'Woonplaats/vereniging',
               'Bruto', 'Netto', 'Snelheid', 'Tempo']
    tot_results = pd.DataFrame(columns=headers)

    for ath in tqdm(athlete_list, desc="Uitslagen ophalen"):
        # ath = athlete_list[3] # debugging
        logging.info(f"Scraping for athlete {ath[0]}")
        url = ath[2]
        scrape_results = pd.read_html(url)
        scrape_clean = scrape_results[4:-1] # drop some bullshit
        logging.info(f"{len(scrape_clean)} results found for {ath[0]}")

        if scrape_clean: # if results are found
            # temp df for current athlete-race
            race_results = pd.DataFrame(index=range(len(scrape_clean)), columns=tot_results.columns)

            for i in range(len(scrape_clean)):
                # Evenement, Datum, Plaats
                dat_event_loc = str(scrape_clean[i].columns[1]).split(",")
                dat_event = dat_event_loc[0].split(" ")
                race_results.loc[i, 'Datum'] = dat_event[0]
                race_results.loc[i, 'Evenement'] = dat_event[1]
                race_results.loc[i, 'Plaats'] = dat_event_loc[1]

                # Categorie, Afstand
                cat_dist = str(scrape_clean[i].iloc[1,0]).split(",")
                race_results.loc[i, 'Categorie'] = cat_dist[0]
                race_results.loc[i, 'Afstand'] = cat_dist[1]

                # Positie, Naam, Woonplaats/vereniging, Bruto, Netto
                race_results.iloc[i, 5:] = scrape_clean[i].iloc[3, :]

        else: # if no race results
            # create empty row with name only
            race_results = pd.DataFrame(index=[0], columns=tot_results.columns)
            race_results['Naam'] = ath[0]

        # append current athlete-race to output
        tot_results = tot_results._append(race_results, ignore_index=True)

        # clean up
        tot_results['Snelheid'] = tot_results['Snelheid'].str.replace('\xa0km/u', '', regex=False)
        tot_results['Tempo'] = tot_results['Tempo'].str.replace('\xa0min/km', '', regex=False)

    return tot_results

def main():
  input = get_athlete_data()
  athlete_list = parser(input)

  tot_results = scrape(athlete_list)

  today = datetime.now().strftime("%Y-%m-%d")
  output_file = f"uitslagen_{today}.csv"
  tot_results.to_csv(output_file, index=False)

  print(f"\nKlaar! Uitslagen zijn opgeslagen in {output_file}.")
  files.download(output_file)

if __name__ == '__main__':
        main()
