Takes files like number 1 and creates files like number 2 in *file_process*. Iterates over all files in a directory and and corrects the time values by adding an increasing value each time there was a "reset" in the time-series. 

In [1]:
import os
import pandas as pd

In [7]:
def adjust_time_series(input_path, output, constant = None):
    """
    Function to adjust files containing time series where the time steps decrease at times. This is done in order to allow interpolation over the entirety of the files without the need to separate 
    the time series based on instances where the difference between time steps is negative. The files provided contain 5 columns: time, x position, y position, z position, difference between consecutive 
    time steps and sign of the difference.
    path is the files directory, here we will be working with .xlsx (Excel files),
    output is the path to the direcotry where the newly generated file should be stored,
    constant is an optional variable that allows for specifying the value added to the time value when a negative time difference occurs.
    """

    df = pd.read_excel(input_path)

    #For simplicity renames the columns 
    #df.columns = ["time", "x", "y", "z", "time_delta", "diff_sign"]
    df.columns = ["time", "x", "y", "z"]

    #Calculates the difference between consecutive time steps
    df["time_diff"] = df["time"].diff()

    #Looks for indexes where time difference is negative
    negative_time_diff_indexes = df[df["time_diff"] < 0].index

    #Defines constant if it was not provided
    if constant is None:
        constant = df["time_diff"].max() * 2

    #Creates a cumulative adjustment variable
    cumulative = 0

    #Iterates through identified negative time difference instances and adjusts time values
    for index in negative_time_diff_indexes:
        cumulative += (df.loc[(index - 1), "time"] + constant) - df.loc[index, "time"]
        df.loc[index:, "time"] += cumulative

    #Defines output file path
    output_path = os.path.join(output, os.path.basename(input_path))

    #Saves the adjusted data frame to a new Exel file
    df.to_excel(output_path, index = False)

    print(f"Adjusted file saved to {output_path}")
    

In [5]:
#Directories containing Excel files
read_directory = "/Users/maks/Documents/MSc_project/data/coords_extracted"
write_directory = "/Users/maks/Documents/MSc_project/data/interpolation_prep"

In [10]:
#Iterates over files in read_driecotry and runs adjust_time_series
for filename in os.listdir(read_directory):
    if filename.startswith("."): #ignores hidden files
        continue
    elif filename.endswith(".xlsx"): #allows only Excel files
        print(filename) #shows which file is worked on 
        input_path = os.path.join(read_directory, filename)
        adjust_time_series(input_path, write_directory)

green68.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/green68.xlsx
red82.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/red82.xlsx
red70.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/red70.xlsx
yellow85.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/yellow85.xlsx
untagged2.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/untagged2.xlsx
whiter73.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/whiter73.xlsx
yellow66.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/yellow66.xlsx
blue41.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/blue41.xlsx
yellow71.xlsx
Adjusted file saved to /Users/maks/Documents/MSc_project/data/interpolation_prep/yellow71.xlsx
untagged3.xlsx
Adjusted file saved 