In [1]:
"""IMPORTANT: MUST RUN CELLS IN ORDER!!
    2017 North Islands
"""
import os
import json
from requests.exceptions import HTTPError
import time

import boto3
import pandas as pd

from lifter_api import LifterAPI

In [2]:
# authenticate

URL = None
# URL = "http://localhost:8000"

auth_token = os.getenv("LOCAL_API_TOKEN")
api = LifterAPI(url=URL, auth_token=auth_token)
if URL != "http://localhost:8000":
    auth_token = os.getenv("API_TOKEN")
    api = LifterAPI(url="https://api.lifter.shivan.xyz", auth_token=auth_token)
    print("connected LIVE")
else:
    print("connected locally")


# Create competition

def create_competition(date_start, date_end, competition_name, location):
    """
    This will also check if the competition already exists by name, and also sessions by name as well.
    """

    # search for a competition
    current_competitions = {}
    next_page = "page=1"
    while next_page:
        competitions = api.competitions(page=int(next_page.split("page")[-1].replace("=", "")))
        for competition in competitions['results']:
            current_competitions[competition['competition_name']] = competition['reference_id']
        next_page = competitions['next']

    current_competition_names = list(current_competitions.keys())
    
    if competition_name not in current_competition_names:
        print(f"{competition_name} created.")
        return api.create_competition(date_start=date_start,
                            date_end=date_end,
                            location=location,
                            competition_name=competition_name)
    print(f"ERROR: {competition_name} already exists!")
    return api.get_competition(competition_id=current_competitions[competition_name])

    
# Create Sessions

def create_session(competition_id, session_datetime, referee_first, referee_second, referee_third, technical_controller, jury, marshall, announcer, timekeeper):
    """
    Create a sessions and also check if the session exists using the time
    """
    # search for sessions
    sessions = api.sessions(competition_id=competition_id)
    current_sessions = {}
    for session in sessions['results']:
        current_sessions[session['session_datetime']] = session['reference_id']
    
    current_session_datetimes = list(current_sessions.keys())
    
    if session_datetime not in current_session_datetimes:
        print(f"Session at {session_datetime} created.")
        return api.create_session(
            competition_id=competition_id,
            session_datetime=session_datetime,
            referee_first=referee_first,
            referee_second=referee_second,
            referee_third=referee_third,
            technical_controller=technical_controller,
            jury=jury,
            marshall=marshall,
            announcer=announcer,
            timekeeper=timekeeper,
        )
    print(f"ERROR: Session at {session_datetime} already exists.")
    return api.get_session(competition_id=competition_id, session_id=current_sessions[session_datetime])

def create_athlete(first_name: str, last_name: str, yearborn: int):
    first_name = first_name
    last_name = last_name
    if api.find_athlete(search=(first_name + " " + last_name))["count"] == 0:
        create = api.create_athlete(
            first_name=first_name, last_name=last_name, yearborn=yearborn
        )
        print(f"athlete created {create['first_name']} {create['last_name']}")
    else:
        print("athlete already exists")
    
def parse_lift(lift: str) -> tuple[str, int]:
    if lift == "-":
        return ("DNA", 0)
    if lift[0] == "-":
        return ("NOLIFT", int(lift[1:]))
    return ("LIFT", int(lift))

def create_lift(competition_id: str, session_id: str, lift: dict[str: str|int]):
    print(lift)
    first_name = lift["athlete_name"].split(" ")[0]
    last_name = lift["athlete_name"].split(" ")[1]
    athlete = api.find_athlete(search=(first_name + " " + last_name))
    if athlete['count'] == 1:
        try:
            api.create_lift(
                competition_id=competition_id,
                session_id=session_id,
                athlete_id=athlete['results'][0]['reference_id'],
                snatch_first=parse_lift(lift['snatch_first'])[0],
                snatch_first_weight=parse_lift(lift['snatch_first'])[1],
                snatch_second=parse_lift(lift['snatch_second'])[0],
                snatch_second_weight=parse_lift(lift['snatch_second'])[1],
                snatch_third=parse_lift(lift['snatch_third'])[0],
                snatch_third_weight=parse_lift(lift['snatch_third'])[1],
                cnj_first=parse_lift(lift['cnj_first'])[0],
                cnj_first_weight=parse_lift(lift['cnj_first'])[1],
                cnj_second=parse_lift(lift['cnj_second'])[0],
                cnj_second_weight=parse_lift(lift['cnj_second'])[1],
                cnj_third=parse_lift(lift['cnj_third'])[0],
                cnj_third_weight=parse_lift(lift['cnj_third'])[1],
                bodyweight=lift['bodyweight'],
                weight_category=lift['weight_category'],
                team=lift['team'],
                lottery_number=lift['lottery_number'],
            )
            print("Lift created successfully")
        except HTTPError:
            print("Lift already exists")
    elif athlete['count'] > 1:
        print("Multiple athlete with the name query exist")
    else:
        print(f"No athlete of the name, {lift['athlete_name']} this name exist")


def analyse_pdf(file_path, bucket, object_name=None):
    if os.path.exists('result.json'):
        print("results already exist")
        with open('result.json', 'r') as file:
            json_string = file.read() 
            result = json.loads(json_string.replace("'", '"'))
        return result
    if object_name is None:
        object_name = os.path.basename(file_path)
    s3_client = boto3.client('s3')
    s3_client.upload_file(file_path, bucket, object_name)
    client = boto3.client('textract', region_name='ap-southeast-2')
    response = client.start_document_analysis(
        DocumentLocation={
            'S3Object': {
                'Bucket': bucket,
                'Name': object_name}},
            FeatureTypes=["TABLES"]
    )
    def get_analysis():
        result = client.get_document_analysis(JobId=response['JobId'])
        print(result['JobStatus'])
        if result["JobStatus"] == "IN_PROGRESS":
            time.sleep(5)
            result = get_analysis()
        return result
    result = get_analysis()
    results = [result]
    while result.get("NextToken", None):
        result = client.get_document_analysis(JobId=response['JobId'],NextToken=result['NextToken'])
        results.append(result)
    with open('result.json', 'w') as file:
        print('results saved')
        json.dump(results, file)
    return results

connected LIVE


In [3]:
FILE_PATH = "./2017.north_island_championships.pdf"
BUCKET = "reading-optom-notes"
results = analyse_pdf(file_path=FILE_PATH, bucket=BUCKET)

results already exist


In [4]:
df = pd.concat([pd.json_normalize(data=frame["Blocks"]) for frame in results])
df.columns
df = df.drop([
    'Geometry.BoundingBox.Width',
    'Geometry.BoundingBox.Height',
    'Geometry.BoundingBox.Left',
    'Geometry.BoundingBox.Top',
    'Geometry.Polygon',
    'RowIndex',
    'ColumnIndex',
    'RowSpan',
    'ColumnSpan',
    'EntityTypes',
    'SelectionStatus'],
    axis=1)

words = df[df["BlockType"]=="WORD"]

In [5]:
competition = create_competition(
    date_start="2017-06-10",
    date_end= "2017-07-11",
    location= "Kolmar, Papatoetoe, Auckland",
    competition_name="2017 North Island Championships",
)

ERROR: 2017 North Island Championships already exists!


In [6]:
cleaned_words = [_[1]["Text"] for _ in words.iterrows()]

In [7]:
sessions = []
flag_place = False
for i, w in enumerate(cleaned_words):
    if "Place" in w:
        flag_place = True
        lifts_information = []
        session = {}
    if "REFEREE" in w and cleaned_words[i+1] == "1":
        session["lifts_information"] = lifts_information
        session["referee_first"] = f"{cleaned_words[i+6]} {cleaned_words[i+7]}"
        session["referee_second"] = f"{cleaned_words[i+8]} {cleaned_words[i+9]}"
        session["referee_third"] = f"{cleaned_words[i+10]} {cleaned_words[i+11]}"
        session["technical_controller"] = f"{cleaned_words[i+26]} {cleaned_words[i+27]}"
        session["jury"] = f"{cleaned_words[i+16]} {cleaned_words[i+17]} {cleaned_words[i+18]} {cleaned_words[i+19]} {cleaned_words[i+20]} {cleaned_words[i+21]}"
        session["marshall"] = f"{cleaned_words[i+34]} {cleaned_words[i+35]}"
        session["timekeeper"] = f"{cleaned_words[i+28]} {cleaned_words[i+29]}"
        session["announcer"] = f"{cleaned_words[i+30]} {cleaned_words[i+31]}"
        sessions.append(session)
        flag_place = False
    if flag_place == True and not w == "Place":
        lifts_information.append(w)

sessions

[{'lifts_information': ['1',
   'Morgan',
   'Bowler-Parkin',
   'F',
   '1997',
   'INP',
   '47.60',
   '-29',
   '29',
   '-31',
   '43',
   '45',
   '-47',
   '29',
   '45',
   '74',
   '118.079',
   '2',
   '2',
   'Jaime',
   'Watson',
   'F',
   '2006',
   'EPS',
   '32.50',
   '23',
   '-26',
   '-26',
   '25',
   '28',
   '31',
   '23',
   '31',
   '54',
   '122.749',
   '3',
   '3',
   'Danielle',
   'Watson',
   'F',
   '2003',
   'EPS',
   '43.50',
   '34',
   '37',
   '-42',
   '43',
   '47',
   '-52',
   '37',
   '47',
   '84',
   '144.416',
   '1',
   '4',
   'Pip',
   'Patterson',
   'F',
   '1984',
   'FSO',
   '52.80',
   '70',
   '72',
   '74',
   '80',
   '84',
   '88',
   '74',
   '88',
   '162',
   '238.856',
   '1',
   '5',
   'Charlotte',
   'Moss',
   'F',
   '1995',
   'NSO',
   '51.60',
   '62',
   '-65',
   '65',
   '73',
   '77',
   '80',
   '65',
   '80',
   '145',
   '217.433',
   '2',
   '6',
   'Natassia',
   'Suares',
   'F',
   '1982',
   'FSO',
   '5

In [8]:
sessions[0]['session_datetime'] = "2017-06-10T08:00:00+12:00"
sessions[1]['session_datetime'] = "2017-06-10T10:00:00+12:00"
sessions[2]['session_datetime'] = "2017-06-10T13:00:00+12:00"
sessions[3]['session_datetime'] = "2017-06-10T15:00:00+12:00"
sessions[4]['session_datetime'] = "2017-06-10T17:00:00+12:00"
sessions[5]['session_datetime'] = "2017-06-11T08:00:00+12:00"
sessions[6]['session_datetime'] = "2017-06-11T10:00:00+12:00"
sessions[7]['session_datetime'] = "2017-06-11T13:00:00+12:00"
sessions[8]['session_datetime'] = "2017-06-11T15:00:00+12:00"

In [9]:
competition_id = competition["reference_id"]

def give_weight_category(sex: str, body_weight: str) -> str:
    body_weight = float(body_weight)
    if sex == "F":
        if body_weight <= 48.:
            return "W48"
        elif body_weight <=53.:
            return "W53"
        elif body_weight <=58.:
            return "W58"
        elif body_weight <=63.:
            return "W63"
        elif body_weight <=69.:
            return "W69"
        elif body_weight <=75.:
            return "W75"
        elif body_weight <=90.:
            return "W90"
        else:
            return "W90+"

    elif sex == "M":
        if body_weight <= 56.:
            return "M56"
        elif body_weight <=62.:
            return "M62"
        elif body_weight <=69.:
            return "M69"
        elif body_weight <=77.:
            return "M77"
        elif body_weight <=85.:
            return "M85"
        elif body_weight <=94.:
            return "M94"
        elif body_weight <=105.:
            return "M105"
        else:
            return "M105+"

for session in sessions:
    created_session = create_session(competition_id=competition_id,
                    session_datetime=session["session_datetime"],
                    referee_first=session["referee_first"],
                    referee_second=session["referee_second"],
                    referee_third=session["referee_third"],
                    technical_controller=session["technical_controller"],
                    jury=session["jury"],
                    marshall=session["marshall"],
                    announcer=session["announcer"],
                    timekeeper=session["timekeeper"],
    )
    for i in range(0, int(len(session['lifts_information'])/18)):
        time.sleep(0.5)
        athlete = create_athlete(
            first_name=session['lifts_information'][18*i+1],
            last_name=session['lifts_information'][18*i+2],
            yearborn=session['lifts_information'][18*i+4]
        )
        create_lift(
            competition_id=competition_id,
            session_id=created_session["reference_id"],
            lift={
            "athlete_name": f"{session['lifts_information'][18*i+1]} {session['lifts_information'][18*i+2]}",
            "snatch_first": session['lifts_information'][18*i+7],
            "snatch_second": session['lifts_information'][18*i+8],
            "snatch_third": session['lifts_information'][18*i+9],
            "cnj_first": session['lifts_information'][18*i+10],
            "cnj_second": session['lifts_information'][18*i+11],
            "cnj_third": session['lifts_information'][18*i+12],
            "bodyweight": float(session['lifts_information'][18*i+6]),
            "weight_category": give_weight_category(session["lifts_information"][18*i+3], session["lifts_information"][18*i+6]),
            "team": session['lifts_information'][18*i+5],
            "lottery_number": int(session['lifts_information'][18*i]),
            }
        )

ERROR: Session at 2017-06-10T08:00:00+12:00 already exists.
athlete already exists
{'athlete_name': 'Morgan Bowler-Parkin', 'snatch_first': '-29', 'snatch_second': '29', 'snatch_third': '-31', 'cnj_first': '43', 'cnj_second': '45', 'cnj_third': '-47', 'bodyweight': 47.6, 'weight_category': 'W48', 'team': 'INP', 'lottery_number': 1}
Lift already exists
athlete already exists
{'athlete_name': 'Jaime Watson', 'snatch_first': '23', 'snatch_second': '-26', 'snatch_third': '-26', 'cnj_first': '25', 'cnj_second': '28', 'cnj_third': '31', 'bodyweight': 32.5, 'weight_category': 'W48', 'team': 'EPS', 'lottery_number': 2}
Lift already exists
athlete already exists
{'athlete_name': 'Danielle Watson', 'snatch_first': '34', 'snatch_second': '37', 'snatch_third': '-42', 'cnj_first': '43', 'cnj_second': '47', 'cnj_third': '-52', 'bodyweight': 43.5, 'weight_category': 'W48', 'team': 'EPS', 'lottery_number': 3}
Lift already exists
athlete already exists
{'athlete_name': 'Pip Patterson', 'snatch_first': 

KeyboardInterrupt: 

In [None]:
# problem with 
sessions[1]['lifts_information'][80:100]

['78',
 '81',
 '95',
 '100',
 '105',
 '81',
 '105',
 '186',
 '250.387',
 '1',
 '6',
 'Barry',
 'Lee',
 'M',
 '1971',
 'INP',
 '68.90',
 '70',
 '75',
 '-78']