In [2]:
import os
import pandas as pd
import csv
from io import StringIO

In [9]:
def get_event_type(event):
    event = event.strip('()').upper()

    if event == 'GP':
        return 'Grand Prix'
    elif event == 'JO':
        return 'Olympics'
    elif event == 'SA':
        return 'Satellite'
    elif event == 'A':
        return 'World Cup'
    elif event == 'CHZ':
        return 'Zonal Championship'
    elif event == 'CHM':
        return 'World Championship'
    else:
        raise ValueError(f'Unknown event type: {event}')


def get_tournament_data(tournament_data):
    tournament_split = tournament_data.split()
    date = tournament_split[0]
    event_name = ' '.join(tournament_split[1:-1])
    event_type = get_event_type(tournament_split[-1])

    return date, event_name, event_type

### Iterate through each data file

In [10]:
current_dir = os.getcwd()
try:
    results_dir = os.path.join(current_dir, "results")
except FileNotFoundError:
    print("No results directory found.")
    exit(1)

data = []

for results in os.listdir(results_dir):
    results_path = os.path.join(results_dir, results)
    if results_path.endswith('csv'):
        with open(results_path, 'r') as file:
            lines = file.readlines()

            tournaments = lines[3].strip()
            reader = csv.reader(StringIO(tournaments))
            tournaments = list(reader)[0][2:-1]

            for line in lines[4:504]:
                line = line.strip()
                columns = line.split(',')
                current_rank = columns[0]
                fencer_name = columns[1]
                nationality = columns[2]
             
                for i, tournament in enumerate(tournaments):
                    points_index = 3 + i # points start at column 3
                    points = columns[points_index]

                    if points:
                        points = abs(float(points))
                        date, event_name, event_type = get_tournament_data(tournament)
                        data.append([current_rank, fencer_name, nationality, event_name, event_type, points, date])

### Store data to Panda dataframe

In [11]:
df = pd.DataFrame(data, columns=['current_rank', 'fencer_name', 'fencer_nationality', 'event_name', 'event_type', 'points', 'date'])
df = df.drop_duplicates(subset=['fencer_name', 'event_name', 'date'], keep='last')
df

Unnamed: 0,current_rank,fencer_name,fencer_nationality,event_name,event_type,points,date
0,1,MASSIALAS Alexander,USA,Bonn,World Cup,2.0,11.11.22
1,1,MASSIALAS Alexander,USA,Tokyo,World Cup,14.0,09.12.22
2,1,MASSIALAS Alexander,USA,Paris,World Cup,32.0,12.01.23
3,1,MASSIALAS Alexander,USA,Turin,Grand Prix,21.0,11.02.23
4,1,MASSIALAS Alexander,USA,Cairo,World Cup,32.0,23.02.23
...,...,...,...,...,...,...,...
14732,496,EL AKKAD Ahmed,MAR,Casablanca,Zonal Championship,0.0,08.06.24
14733,497,LACHIRI Anouar,MAR,Casablanca,Zonal Championship,0.0,08.06.24
14734,498,AL MARZOOQI Khaled,UAE,Tashkent,Satellite,0.0,14.10.23
14735,499,NURUMOV Emir,KAZ,Tashkent,Satellite,0.0,14.10.23
