In [4]:
import pandas as pd
import numpy as np
import glob
import os
import re
from datetime import datetime, timedelta

### Load data

In [5]:
def load_and_concat_csv(folder_path, chunksize=None):
    all_files = glob.glob(os.path.join(folder_path, "*.csv"))
    df_list = []

    for filename in all_files:
        try:
            # Read the CSV file, potentially in chunks
            df_chunks = pd.read_csv(filename, chunksize=chunksize, 
                                    low_memory=False, encoding='utf-8')
            
            if chunksize:
                # If reading in chunks, concatenate the chunks
                df = pd.concat(df_chunks, ignore_index=True)
            else:
                df = next(df_chunks)
            
            df['source_file'] = os.path.basename(filename)
            df_list.append(df)
        except Exception as e:
            print(f"Error reading file {filename}: {str(e)}")

    combined_df = pd.concat(df_list, ignore_index=True, sort=False)
    
    # Handle missing values
    #combined_df = combined_df.fillna('Unknown')  # or use another appropriate method
    
    return combined_df

In [3]:
folder_path = "../output/"
result_df = load_and_concat_csv(folder_path, chunksize=100000)

print(result_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8009673 entries, 0 to 8009672
Data columns (total 23 columns):
 #   Column                             Dtype  
---  ------                             -----  
 0   Rank                               object 
 1   Performance                        object 
 2   Surname, first name                object 
 3   Club                               object 
 4   Nat.                               object 
 5   YOB                                object 
 6   M/F                                object 
 7   Rank M/F                           float64
 8   Cat                                object 
 9   Cat. Rank                          float64
 10  Avg.Speed km/h                     object 
 11  Age graded performance             object 
 12  Runner ID                          object 
 13  Event                              object 
 14  Date                               object 
 15  Distance                           object 
 16  Finishers         

In [21]:
def sample_csv(input_file, output_file, n, random_state=None):
    """
    Sample n rows from a CSV file and save to a new CSV file.
    
    :param input_file: Path to the input CSV file
    :param output_file: Path to save the output CSV file
    :param n: Number of rows to sample
    :param random_state: Seed for random number generator (optional)
    """
    # Read the CSV file
    df = pd.read_csv(input_file)
    
    # Sample n rows
    sampled_df = df.sample(n=n, random_state=random_state)
    
    # Save the sampled DataFrame to a new CSV file
    sampled_df.to_csv(output_file, index=False)
    
    print(f"Sampled {n} rows from {input_file} and saved to {output_file}")

# Usage
input_file = '../output/all_events_data_2024.csv'
output_file = '2024_sample.csv'
sample_size = 500 

sample_csv(input_file, output_file, sample_size)

Sampled 500 rows from /Users/riz/Projects/duvdata/output/all_events_data_2024.csv and saved to 2024_sample.csv


### Clean data/Feature eng

In [83]:
df = pd.read_csv("../output/all_events_data_2024.csv")
#df[df['Event ID']==105226]

In [87]:
def split_runner_name(data):
    # Initialize 'Surname' and 'First Name' columns with empty strings
    data['Surname'] = ''
    data['First Name'] = ''
    
    # Check 'Surname, first name' column first
    if 'Surname, first name' in data.columns:
        mask = data['Surname, first name'].notna()
        temp = data.loc[mask, 'Surname, first name'].str.split(',', expand=True)
        data.loc[mask, 'Surname'] = temp[0].str.strip()
        data.loc[mask, 'First Name'] = temp[1].str.strip() if temp.shape[1] > 1 else ''
    
    # Check 'Original name\nSurname, first name' column if 'Surname' is still empty
    original_name_column = 'Original name\nSurname, first name'
    if original_name_column in data.columns:
        mask = (data['Surname'] == '') & (data[original_name_column].notna())
        temp = data.loc[mask, original_name_column].str.split(',', expand=True)
        data.loc[mask, 'Surname'] = temp[0].str.strip()
        data.loc[mask, 'First Name'] = temp[1].str.strip() if temp.shape[1] > 1 else ''
    
    return data
def parse_performance(data, column_name, time_column, distance_column):
    time_regex = re.compile(r'(?:(\d+)d )?(\d{1,2}):(\d{2}):(\d{2}) h')
    distance_regex = re.compile(r'(\d+\.?\d*) km')
    
    def parse_entry(entry):
        if 'km' in entry:
            match = distance_regex.search(entry)
            return (None, float(match.group(1)) if match else None)
        else:
            match = time_regex.search(entry)
            if match:
                days, hours, minutes, seconds = match.groups(default='0')
                total_seconds = timedelta(days=int(days), hours=int(hours), minutes=int(minutes), seconds=int(seconds)).total_seconds()
                return (total_seconds, None)
        return (None, None)
    
    data[time_column], data[distance_column] = zip(*data[column_name].apply(parse_entry))
    return data

def split_distance_column(df):
    distance_pattern = re.compile(r'(\d+\.?\d*)\s*(km|mi|h)')
    race_type_pattern = re.compile(r'(\d+\.?\d*\s*(km|mi|h))\s*(.*)')
    
    def split_distance(entry):
        distance_match = distance_pattern.search(entry)
        race_type_match = race_type_pattern.search(entry)
        
        if distance_match:
            distance = distance_match.group(0)
            unit = distance_match.group(2)
            race_type = 'Time' if unit == 'h' else 'Distance'
        else:
            distance = None
            race_type = None
        
        terrain = race_type_match.group(3).strip() if race_type_match and len(race_type_match.groups()) > 2 else None
        
        return distance, terrain, race_type
    
    df['Distance/Time'], df['Terrain'], df['Event Type'] = zip(*df['Distance'].apply(split_distance))
    return df

def convert_miles_to_km(entry):
    if pd.isna(entry):
        return None
    match = re.match(r'(\d+\.?\d*)(mi|km)', str(entry))
    if match:
        distance, unit = float(match.group(1)), match.group(2)
        return distance * 1.6 if unit == 'mi' else distance
    return None

def standardize_terrain(terrain):
    terrain = str(terrain).lower()
    if 'trail' in terrain:
        return 'trail'
    elif 'road' in terrain:
        return 'road'
    elif 'track' in terrain:
        return 'track'
    else:
        return 'other'


def parse_date_range(date_str):
    if pd.isna(date_str):
        return None
    
    date_str = str(date_str).strip()
    
    # If it's already in the correct format, return as is
    if re.match(r'\d{2}\.\d{2}\.\d{4}', date_str):
        return date_str
    
    # If it's a single date without year, add the current year
    if re.match(r'\d{1,2}\.\d{1,2}\.?$', date_str):
        return f"{date_str.rstrip('.')}1970"  # Using 1970 as a placeholder year
    
    # If it's a date range
    if '-' in date_str:
        start, end = date_str.split('-')
        start = start.strip()
        end = end.strip()
        
        # Extract day, month, and year components
        start_parts = re.findall(r'\d+', start)
        end_parts = re.findall(r'\d+', end)
        
        # Ensure we have at least day and month for start date
        if len(start_parts) < 2:
            return None  # Return None if format is unexpected
        
        # Get year from end date if available, otherwise use current year
        year = end_parts[-1] if len(end_parts) == 3 else str(datetime.now().year)
        
        # Construct the full start date
        return f"{start_parts[0].zfill(2)}.{start_parts[1].zfill(2)}.{year}"
    
    # If it's a single date with year
    if re.match(r'\d{1,2}\.\d{1,2}\.\d{4}', date_str):
        parts = date_str.split('.')
        return f"{parts[0].zfill(2)}.{parts[1].zfill(2)}.{parts[2]}"
    
    # If we can't parse the date, return None
    return None


def extract_location(event):
    match = re.search(r'\((\w+)\)$', event)
    return match.group(1) if match else 'Unknown'

def categorize_age_group(age):
    if pd.isna(age):
        return 'Unknown'
    elif age < 20:
        return 'Under 20'
    elif 20 <= age < 30:
        return '20-29'
    elif 30 <= age < 40:
        return '30-39'
    elif 40 <= age < 50:
        return '40-49'
    elif 50 <= age < 60:
        return '50-59'
    elif 60 <= age < 70:
        return '60-69'
    else:
        return '70+'

def add_age_group(df):
    df['Age Group'] = df['Age'].apply(categorize_age_group)
    return df

def add_runner_statistics(df):
    # Sort the dataframe by Runner ID and Date
    df = df.sort_values(['Runner ID', 'Date'])
    
    # Group by Runner ID
    grouped = df.groupby('Runner ID')
    
    # Number of races (Experience Level) - subtract 1 to exclude current race
    df['Race Count'] = grouped.cumcount()
    
    # Determine the distance to use for cumulative calculation
    df['Distance For Cumulative'] = df.apply(
        lambda row: row['Distance Finish'] if row['Event Type'] == 'Time' else row['Distance KM'],
        axis=1
    )
    
    # Cumulative sum of Distance (excluding current race)
    df['Cumulative Distance KM'] = grouped['Distance For Cumulative'].transform(
        lambda x: x.shift().cumsum()
    )
    
    # Rolling average of Winner Percentage (excluding current race)
    df['Avg Winner Percentage'] = grouped['Winner Percentage'].transform(
        lambda x: x.shift().expanding().mean()
    )
    
    # Remove the temporary column
    df = df.drop('Distance For Cumulative', axis=1)
    
    # Replace NaN values with 0 for first race of each runner
    df['Race Count'] = df['Race Count'].fillna(0)
    df['Cumulative Distance KM'] = df['Cumulative Distance KM'].fillna(0)
    df['Avg Winner Percentage'] = df['Avg Winner Percentage'].fillna(0)
    
    return df

def extract_finishers(df):
    def parse_finishers(finishers_str):
        match = re.match(r'(\d+)\s*\((\d+)\s*M,\s*(\d+)\s*F\)', finishers_str)
        if match:
            total = int(match.group(1))
            male = int(match.group(2))
            female = int(match.group(3))
            return total, male, female
        else:
            return None, None, None

    df[['Total Finishers', 'Male Finishers', 'Female Finishers']] = df['Finishers'].apply(parse_finishers).apply(pd.Series)
    return df


def add_elevation_gain_per_km(df):
    # Calculate elevation gain per km
    df['Elevation Gain per KM'] = df.apply(
        lambda row: row['Elevation Gain'] / row['Distance KM'] 
        if pd.notnull(row['Elevation Gain']) and pd.notnull(row['Distance KM']) and row['Distance KM'] != 0 
        else np.nan, 
        axis=1
    )
    
    # Calculate the median elevation gain per km for each terrain type
    median_elevation_by_terrain = df.groupby('Terrain')['Elevation Gain per KM'].median()
    
    # Fill NaN values with the median for the corresponding terrain type
    df['Elevation Gain per KM'] = df.apply(
        lambda row: median_elevation_by_terrain[row['Terrain']] 
        if pd.isnull(row['Elevation Gain per KM']) and row['Terrain'] in median_elevation_by_terrain
        else row['Elevation Gain per KM'],
        axis=1
    )
    
    return df

def calculate_winner_percentage(df):
    # For distance races (where Event Type is 'Distance')
    distance_mask = df['Event Type'] == 'Distance'
    df.loc[distance_mask, 'Winner Percentage'] = (1 - df.loc[distance_mask, 'Time Seconds Winner'] / df.loc[distance_mask, 'Time Seconds Finish']).round(2)

    # For time races (where Event Type is 'Time')
    time_mask = df['Event Type'] == 'Time'
    df.loc[time_mask, 'Winner Percentage'] = (1 - df.loc[time_mask, 'Distance Finish'] / df.loc[time_mask, 'Distance Winner']).round(2)

    return df


In [88]:
def clean_data(df):
    df = split_runner_name(df)
    df = split_distance_column(df)
    df = parse_performance(df, 'Performance', 'Time Seconds Finish', 'Distance Finish')
    df = parse_performance(df, 'Winner Time', 'Time Seconds Winner', 'Distance Winner')
    df = calculate_winner_percentage(df)
    df = extract_finishers(df)
    
    df['Terrain'] = df['Terrain'].apply(standardize_terrain)
    df['Distance KM'] = df['Distance/Time'].apply(convert_miles_to_km).round(0)
    df['Finish Percentage'] = (df['Rank'] / df['Total Finishers']).round(2)
    df['Distance KM'] = df['Distance KM'].replace(0, pd.NA)
    df['Average Speed'] = df['Time Seconds Finish'] / df['Distance KM']
    df['Race Location'] = df['Event'].apply(extract_location)
    df['Gender'] = df['M/F']

    df['Date'] = pd.to_datetime(df['Date'].apply(parse_date_range), format='%d.%m.%Y')
    df['YOB'] = pd.to_numeric(df['YOB'], errors='coerce').astype('Int64')
    df['Avg.Speed km/h'] = df['Avg.Speed km/h'].astype(float)
    df['Elevation Gain'] = pd.to_numeric(df['Elevation Gain'].replace({'Hm': '', 'm': ''}, regex=True), errors='coerce')

    df = df.fillna({'M/F': 'Unknown', 'Cat': 'Unknown'})
    df['Club'] = df['Club'].str.strip().str.replace(r'[^\w\s]', '', regex=True)
    df['Nat.'] = df['Nat.'].str.strip().str.upper()
    df['Age'] = df['Date'].dt.year - df['YOB']
    df = add_age_group(df)
    
    df = add_runner_statistics(df)
    df = add_elevation_gain_per_km(df)
    
    df = df.sort_values(by=['Date', 'Race Location', 'Event', 'Rank'])

    return df

In [89]:
# Apply the cleaning function to your DataFrame
df_clean = clean_data(df)
df_clean.head()

Unnamed: 0,Rank,Performance,"Surname, first name",Club,Nat.,YOB,M/F,Rank M/F,Cat,Cat. Rank,...,Finish Percentage,Average Speed,Race Location,Gender,Age,Age Group,Race Count,Cumulative Distance KM,Avg Winner Percentage,Elevation Gain per KM
419506,1,5:27:45 h,,,CHN,,M,1,Unknown,1.0,...,0.02,371.037736,CHN,M,,Unknown,0,0.0,0.0,12.0
419507,2,5:52:49 h,,,CHN,1981.0,M,2,M40,1.0,...,0.03,399.415094,CHN,M,43.0,40-49,0,0.0,0.0,12.0
419508,3,6:47:54 h,,,CHN,1982.0,M,3,M40,2.0,...,0.05,461.773585,CHN,M,42.0,40-49,0,0.0,0.0,12.0
419509,4,7:08:28 h,,,CHN,,M,4,Unknown,2.0,...,0.06,485.056604,CHN,M,,Unknown,0,0.0,0.0,12.0
419510,5,7:10:26 h,,,CHN,,F,1,Unknown,3.0,...,0.08,487.283019,CHN,F,,Unknown,0,0.0,0.0,12.0


In [90]:
columns_to_keep = ['Runner ID','First Name','Surname','Nat.','Gender','Age','Age Group','Cat','YOB',
                   'Race Count','Cumulative Distance KM','Avg Winner Percentage',
                   'Event ID','Event','Event Type','Date','Race Location','Elevation Gain','Elevation Gain per KM',
                   'Total Finishers','Male Finishers','Female Finishers',
                   'Rank','Rank M/F','Cat. Rank','Finish Percentage','Winner Percentage',
                   'Distance/Time','Distance KM','Terrain',
                   'Time Seconds Finish','Distance Finish','Average Speed','Avg.Speed km/h']
df_clean = df_clean[columns_to_keep]

In [92]:
df_clean.to_csv('../tmp_clean.csv',index=False)

In [91]:
df_clean.head()

Unnamed: 0,Runner ID,First Name,Surname,Nat.,Gender,Age,Age Group,Cat,YOB,Race Count,...,Cat. Rank,Finish Percentage,Winner Percentage,Distance/Time,Distance KM,Terrain,Time Seconds Finish,Distance Finish,Average Speed,Avg.Speed km/h
419506,2152651,Shu-Bin (金树滨),Jin,CHN,M,,Unknown,Unknown,,0,...,1.0,0.02,0.0,53km,53.0,road,19665.0,,371.037736,9.703
419507,1546671,Xin-Hua (钟新华),Zhong,CHN,M,43.0,40-49,M40,1981.0,0,...,1.0,0.03,0.07,53km,53.0,road,21169.0,,399.415094,9.013
419508,1564224,Xin-Hui (王心晖),Wang,CHN,M,42.0,40-49,M40,1982.0,0,...,2.0,0.05,0.2,53km,53.0,road,24474.0,,461.773585,7.796
419509,2153509,Qi-Tao (余琦涛),Yu,CHN,M,,Unknown,Unknown,,0,...,2.0,0.06,0.24,53km,53.0,road,25708.0,,485.056604,7.422
419510,2153079,Wei-Wei (陶维维),Tao,CHN,F,,Unknown,Unknown,,0,...,3.0,0.08,0.24,53km,53.0,road,25826.0,,487.283019,7.388


In [95]:
df_clean.sort_values(by=['Average Speed'])

Unnamed: 0,Runner ID,First Name,Surname,Nat.,Gender,Age,Age Group,Cat,YOB,Race Count,...,Cat. Rank,Finish Percentage,Winner Percentage,Distance/Time,Distance KM,Terrain,Time Seconds Finish,Distance Finish,Average Speed,Avg.Speed km/h
112850,2237537,Michael Riis,Jacobsen,NOR,M,49.0,40-49,M45,1975,0,...,2.0,0.94,-inf,63km,63.0,trail,0.0,,0.0,6.889
112845,2237535,Hanna,Løvås,NOR,F,25.0,20-29,W23,1999,0,...,3.0,0.84,-inf,63km,63.0,trail,0.0,,0.0,6.889
112853,1880838,Sebastian Edmund Pedersen,Wood,NOR,M,31.0,30-39,M23,1993,1,...,17.0,1.00,-inf,63km,63.0,trail,0.0,,0.0,6.889
112852,1879629,Magnus Romstad,Stavne,NOR,M,31.0,30-39,M23,1993,0,...,17.0,0.98,-inf,63km,63.0,trail,0.0,,0.0,6.889
112846,1638497,Christine,Marcussen,NOR,F,36.0,30-39,W35,1988,1,...,3.0,0.86,-inf,63km,63.0,trail,0.0,,0.0,6.889
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401199,2023874,Samson,Gondwe,ZAM,M,,Unknown,Unknown,,0,...,6.0,0.78,0.50,16h,,other,,53.648,,
401200,2163991,Christine,Coppinger,ZAM,F,,Unknown,Unknown,,0,...,6.0,0.83,0.50,16h,,other,,53.648,,
401201,2163992,Sandra,Benthe,ZAM,F,,Unknown,Unknown,,0,...,11.0,0.89,0.56,16h,,other,,46.942,,
401202,2163993,Juliet,Munro,ZAM,F,,Unknown,Unknown,,0,...,11.0,0.94,0.56,16h,,other,,46.942,,


### Exclude erroneous entries
- where 0 time is recorded
- where speed is greater than x
- 

In [100]:
df_clean[df_clean['Avg.Speed km/h']>15].sort_values(by=['Average Speed'])

Unnamed: 0,Runner ID,First Name,Surname,Nat.,Gender,Age,Age Group,Cat,YOB,Race Count,...,Cat. Rank,Finish Percentage,Winner Percentage,Distance/Time,Distance KM,Terrain,Time Seconds Finish,Distance Finish,Average Speed,Avg.Speed km/h
231523,1862637,Tonny,Skink,ZIM,M,,Unknown,Unknown,,0,...,1.0,0.00,0.00,50km,50.0,road,10011.0,,200.22,17.980
231524,2185177,Puseletso,Mofokeng,RSA,M,,Unknown,Unknown,,0,...,2.0,0.00,0.00,50km,50.0,road,10021.0,,200.42,17.962
231525,393683,Lutendo,Mapoto,RSA,M,36.0,30-39,M35,1988,1,...,1.0,0.00,0.00,50km,50.0,road,10028.0,,200.56,17.950
372363,1875143,Pule,Sibeko,RSA,M,26.0,20-29,M23,1998,0,...,1.0,0.00,0.00,50km,50.0,road,10029.0,,200.58,17.948
372364,1644528,Guillaume,Ruel,FRA,M,27.0,20-29,M23,1997,0,...,2.0,0.01,0.00,50km,50.0,road,10052.0,,201.04,17.907
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330496,2024927,Benjamin,Becker,GER,M,39.0,30-39,M35,1985,1,...,2.0,0.04,0.12,50km,50.0,road,11998.0,,239.96,15.003
330312,2024927,Benjamin,Becker,GER,M,39.0,30-39,M35,1985,0,...,2.0,0.03,0.12,50km,50.0,road,11998.0,,239.96,15.003
383062,1703815,Laurent,Couderc,FRA,M,32.0,30-39,M23,1992,0,...,1.0,1.00,0.00,3h,,road,,45.310,,15.103
265113,1063539,Dariusz,Nozynski,POL,M,44.0,40-49,M40,1980,1,...,1.0,0.02,0.00,6h,,track,,93.670,,15.612


In [108]:
df[df['Performance']=="0:00:00 h"]

Unnamed: 0,Rank,Performance,"Surname, first name",Club,Nat.,YOB,M/F,Rank M/F,Cat,Cat. Rank,...,Distance Winner,Winner Percentage,Total Finishers,Male Finishers,Female Finishers,Distance KM,Finish Percentage,Average Speed,Race Location,Gender
112845,43,0:00:00 h,"Løvås, Hanna",*Trondheim,NOR,1999,F,7,W23,3.0,...,,-inf,51,43,8,63.0,0.84,0.0,NOR,F
112846,44,0:00:00 h,"Marcussen, Christine",*Oslo,NOR,1988,F,7,W35,3.0,...,,-inf,51,43,8,63.0,0.86,0.0,NOR,F
112847,45,0:00:00 h,"Finkenhagen, Noa",*Drammen,NOR,2002,M,37,MU23,4.0,...,,-inf,51,43,8,63.0,0.88,0.0,NOR,M
112848,46,0:00:00 h,"Galaaen, Øistein Schmidt",*Oslo,NOR,1976,M,37,M45,2.0,...,,-inf,51,43,8,63.0,0.9,0.0,NOR,M
112849,47,0:00:00 h,"Hye-Knudsen, William",*Malé,DEN,1995,M,37,M23,17.0,...,,-inf,51,43,8,63.0,0.92,0.0,NOR,M
112850,48,0:00:00 h,"Jacobsen, Michael Riis",*Asker,NOR,1975,M,37,M45,2.0,...,,-inf,51,43,8,63.0,0.94,0.0,NOR,M
112851,49,0:00:00 h,"Østergaard, Kim Andre",*Oslo,NOR,1991,M,37,M23,17.0,...,,-inf,51,43,8,63.0,0.96,0.0,NOR,M
112852,50,0:00:00 h,"Stavne, Magnus Romstad",CTR,NOR,1993,M,37,M23,17.0,...,,-inf,51,43,8,63.0,0.98,0.0,NOR,M
112853,51,0:00:00 h,"Wood, Sebastian Edmund Pedersen",Club Tjommis,NOR,1993,M,37,M23,17.0,...,,-inf,51,43,8,63.0,1.0,0.0,NOR,M


In [114]:
df_clean.sort_values(by='Distance Finish', ascending=False)

Unnamed: 0,Runner ID,First Name,Surname,Nat.,Gender,Age,Age Group,Cat,YOB,Race Count,...,Cat. Rank,Finish Percentage,Winner Percentage,Distance/Time,Distance KM,Terrain,Time Seconds Finish,Distance Finish,Average Speed,Avg.Speed km/h
279555,530030,Budjargal (Буджаргал Бямбаа),Byambaa,MGL,M,,Unknown,M40,1982,0,...,1.0,0.04,,,,other,,1315.509,,5.481
279556,5307,Andrea,Marcato,ITA,M,,Unknown,M40,1982,7,...,2.0,0.08,,,,other,,1196.355,,4.985
235555,5307,Andrea,Marcato,ITA,M,,Unknown,M40,1982,6,...,1.0,0.05,,,,other,,1172.005,,4.883
279557,1236609,Chia-Hung (魏嘉鴻),Wei,TPE,M,,Unknown,M50,1973,0,...,1.0,0.12,,,,other,,1140.990,,4.754
235556,683763,Lu-Cong (耿鲁聪),Geng,CHN,M,,Unknown,M50,1969,16,...,1.0,0.10,,,,other,,1112.345,,4.635
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
342009,2177470,Hai-Dang,Hoang,VIE,M,,Unknown,Unknown,,0,...,135.0,0.98,0.56,75km,75.0,trail,68364.0,,911.520000,3.949
342010,2130290,Ngoc-Tuyen,Tran,VIE,M,,Unknown,M40,1983,0,...,8.0,0.99,0.56,75km,75.0,trail,68365.0,,911.533333,3.949
342011,2133025,Hong-Nhut,Le,VIE,M,,Unknown,M45,1975,0,...,3.0,0.99,0.56,75km,75.0,trail,68366.0,,911.546667,3.949
342012,1830799,Thi-Thuy-Tuyen,Vo,VIE,F,,Unknown,W23,1992,0,...,9.0,1.00,0.56,75km,75.0,trail,68367.0,,911.560000,3.949
