In [3]:
import sys
import pandas as pd

%run 'Code data_extraction.ipynb'
%run 'Code arcs.ipynb'

REGEX = r'(?<=P)[0-9]*'
PNUM_COLUMN_NAME = 'pNum'
ENUM_COLUMN_NAME = 'eNum'

INPUT_FOLDER = r'new_recording/'
OUTPUT_FOLDER = r"new_data/"

COMBINED_DATA_FILE = r'combined.csv'
CLEANED_DATA_FILE = r'cleaned.csv'
CUT_SIDED_DATA_FILE = r'cut_sided.csv'
ROTATED_DATA_FILE = r'rotated.csv'
ARCS_CALCULATED_FILE = r'arcs_calculated.csv'

def run():
    #combining part
    current_combined = try_read_csv(OUTPUT_FOLDER + COMBINED_DATA_FILE)
    combined, newly_added = combine_data(INPUT_FOLDER, current_combined)
    if(len(combined) > len(current_combined)):
        combined.to_csv(OUTPUT_FOLDER + COMBINED_DATA_FILE)
    
    #cleaning part
    if not newly_added.empty:
        current_cleaned = try_read_csv(OUTPUT_FOLDER + CLEANED_DATA_FILE)
        new_cleaned = clean_data(newly_added)
        cleaned = append_avoiding_duplicating(current_cleaned, new_cleaned)
        cleaned.to_csv(OUTPUT_FOLDER + CLEANED_DATA_FILE)
       
    #cut siding part
    cleaned_data = try_read_csv(OUTPUT_FOLDER + CLEANED_DATA_FILE, decimal=',')

    if not cleaned_data.empty:
        current_cut_sided = try_read_csv(OUTPUT_FOLDER + CUT_SIDED_DATA_FILE, decimal=',')
        new_cleaned_data = get_dataframe_delta(current_cut_sided, cleaned_data)
        new_cut_sided = cut_frames_add_moving_sides(new_cleaned_data)
        if not new_cut_sided.empty:
            cut_sided = append_avoiding_duplicating(current_cut_sided, new_cut_sided)
            cut_sided.to_csv(OUTPUT_FOLDER + CUT_SIDED_DATA_FILE)
        else:
            cut_sided = current_cut_sided
    
    #rotating part
    cut_sided_data = try_read_csv(OUTPUT_FOLDER + CUT_SIDED_DATA_FILE)

    if not cut_sided_data.empty:
        current_rotated = try_read_csv(OUTPUT_FOLDER + ROTATED_DATA_FILE)
        new_cut_sided_data = get_dataframe_delta(current_rotated, cut_sided_data)
        new_rotated = rotate_data(new_cut_sided_data)
        if not new_rotated.empty:
            rotated = append_avoiding_duplicating(current_rotated, new_rotated)
            rotated.to_csv(OUTPUT_FOLDER + ROTATED_DATA_FILE)
        else:
            rotated = current_rotated
            
    #calculation part
    rotated_data = try_read_csv(OUTPUT_FOLDER + ROTATED_DATA_FILE)

    if not rotated_data.empty:
        current_arcs = try_read_csv(OUTPUT_FOLDER + ARCS_CALCULATED_FILE)
        new_rotated_data = get_dataframe_delta(current_arcs, rotated_data)
        new_arcs = calculate_angles(new_rotated_data)
        if not new_arcs.empty:
            arcs = append_avoiding_duplicating(current_arcs, new_arcs)
            arcs.to_csv(OUTPUT_FOLDER + ARCS_CALCULATED_FILE)
        else:
            arcs = current_arcs
    
def try_read_csv(fileName, decimal='.'):
    try:
        df = pd.read_csv(fileName, index_col=0, decimal=decimal)
    except:
        df = pd.DataFrame()
    return df

def append_avoiding_duplicating(current_data, new_data):
    if not current_data.empty:
        persons_current = current_data.pNum.unique()
        valid_new_data = new_data[~new_data['pNum'].isin(persons_current)]
        data = current_data.append(valid_new_data)
    else:
        data = new_data
    return data

def get_dataframe_delta(current_data, containing_new_data):
    if not current_data.empty and not containing_new_data.empty:
        currentPersons = current_data.pNum.unique()
        dataNotInCurrent = containing_new_data[~containing_new_data.pNum.isin(currentPersons)]
        return dataNotInCurrent
    else:
        return containing_new_data

def combine_data(input_folder, current_combined):
    if len(current_combined) > 0:
        currentPersons = current_combined.pNum.unique()
    else:
        currentPersons = []
            
    new_combined = pd.DataFrame()
    if not os.path.exists(input_folder):
        raise FileNotFoundError()
    for root, dirs, files in os.walk(input_folder, topdown=True):
        dirs.sort()
        files.sort()
        fileCounter = 1        
        regResult = re.search(REGEX, root)        
        if type(regResult) != type(None):
            personNumber = int(regResult.group(0))
            if not personNumber in currentPersons:
                #print("Adding Person {0}".format(personNumber))                
                for file in files:
                    fullpath = root + '/' + file
                    if fullpath[-3:] == 'csv':           
                        new_data = pd.read_csv(fullpath)
                        new_data[PNUM_COLUMN_NAME] = personNumber;
                        new_data[ENUM_COLUMN_NAME] = fileCounter;
                        new_combined = new_combined.append(new_data)                        
                        sys.stdout.write("Added Person {0} exercise {1}   ".format(personNumber, fileCounter) + '\r')
                        fileCounter+=1
    if not new_combined.empty:
        combined_data = current_combined.append(new_combined)
        print("Combining data finished, persons added: {0}".format(new_combined.pNum.unique()))
    else:
        combined_data = current_combined
        print("Combining data finished, no new persons detected")
    return combined_data, new_combined
      
def clean_data(dataToClean):
    per_pnum = [x for _, x in dataToClean.groupby(dataToClean.pNum)]
    df_list = []

    for pnum in per_pnum:
        per_enum = [x for _, x in pnum.groupby(pnum.eNum)]
        for enum in per_enum:
            trackings = [x for _, x in enum.groupby(enum.trackingId)]
            df_list.append(max(trackings, key=len))
    single_bodies = pd.concat(df_list)        
    
    print("Cleaning finished, persons cleaned: {0}".format(single_bodies.pNum.unique()))
    return single_bodies
    
def cut_frames_add_moving_sides(dataToCut):
    # Do Thijs' stuff
    per_pnum2 = [x for _, x in dataToCut.groupby(dataToCut.pNum)]
    cut_with_sides = pd.DataFrame()
    for person in per_pnum2:
        per_enum = [x for _, x in person.groupby(person['eNum'])]
        if len(per_enum) == 3:
            for exercise in per_enum:
                try:
                    sys.stdout.write("Cutting Person {0} exercise {1}  ".format(person.iloc[0].pNum, exercise.iloc[0].eNum) + '\r')
                    enum_ = exercise.iloc[0].eNum
                    exercise = exercise.reset_index(drop=True)
                    cut_exercise = cleancsv(exercise, enum_)
                    cut_exercise = cut_exercise.reset_index(drop=True)
                    cut_exercise = WrapperGetPart(cut_exercise, enum_)                    

                    cut_with_sides = cut_with_sides.append(cut_exercise)
                except:
                    print("EXCEPTION while cutting:  Person {0} exercise {1}".format(person.iloc[0].pNum, exercise.iloc[0].eNum))
                    continue     
    if not cut_with_sides.empty:
        print("Cutting frames finished, person cut and sided: {0}".format(cut_with_sides.pNum.unique()))
    else:
        print("Cutting frames finished, no new persons detected")
    return cut_with_sides    

def rotate_data(dataToRotate):
    per_pnum1 = [x for _, x in dataToRotate.groupby(dataToRotate.pNum)]
    rotated = pd.DataFrame()

    for person in per_pnum1:      
        per_enum = [x for _, x in person.groupby(person['eNum'])]
        for exercise in per_enum:
            try:
                sys.stdout.write("Rotating Person {0} exercise {1}  ".format(person.iloc[0].pNum, exercise.iloc[0].eNum) + '\r')
                rotated_e = rotate_body(exercise)
                rotated = rotated.append(rotated_e) 
            except:
                print("EXCEPTION while rotating:  Person {0} exercise {1}".format(person.iloc[0].pNum, exercise.iloc[0].eNum))
                continue
    if not rotated.empty:
        print("Rotating data finished, person rotated: {0}".format(rotated.pNum.unique()))
    else:
        print("Rotating data finished, no new persons detected")
    return rotated
    
def calculate_angles(dataToProcess):
    # Calculate arcs
    per_pnum3 = [x for _, x in dataToProcess.groupby(dataToProcess.pNum)]
    all_arcs = pd.DataFrame()
    for person in per_pnum3:
        pnum = person['pNum'].iloc[0]
        per_enum = [x for _, x in person.groupby(person.eNum)]
        for exercise in per_enum:
            try:
                sys.stdout.write("Calculating angle for Person {0} exercise {1}  ".format(person['pNum'].iloc[0], exercise['eNum'].iloc[0]) + '\r')
                enum_ = exercise['eNum'].iloc[0]
                per_side = [x for _, x in exercise.groupby(exercise.Side)]
                for sided in per_side:
                    Side = sided['Side'].iloc[0]
                    #arcs = get_arcs(sided, pnum, enum_, Side)
                    if not Side == '':
                        arcs = get_arcs(sided, pnum, enum_, Side)
                        arcs['pNum'] = pnum
                        arcs['eNum'] = enum_
                        arcs['Side'] = Side
                        all_arcs = all_arcs.append(arcs)
            except:
                print("EXCEPTION while calculating angles:  Person {0} exercise {1}".format(person['pNum'].iloc[0], exercise['eNum'].iloc[0]))
                continue
    if not all_arcs.empty:
        print("Calculating angles finished, arcs calculated for: {0}".format(all_arcs.pNum.unique()))
    else:
        print("Calculating angles finished, no new persons detected")
    return all_arcs
    

