In [1]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go

#import csv
pd.set_option('display.max_rows', None)  # Display all rows


In [2]:
if not os.path.exists("images"):
    os.mkdir("images")

day = "26"
month = "09"
dataset = "rfid"

date = day + "_" + month #change date accordingly
date_year = "2023-" + month + "-" + day

folder_path = dataset + '_' + date + '/'


def process_file(file_number, date):
    file_path = os.path.join(folder_path, f'rfid_{date}_{file_number}.csv')
    df = pd.read_csv(file_path, parse_dates=['ts'])

    # Remove first empty column if it exists
    if df.shape[1] == 3:
        df = df.drop(df.columns[0], axis=1)
    # Remove entries with 'gabarit' equal to 0
    df= df[df['gabarit'] != 0]

    # Write the updated DataFrame back to the CSV file
    df.to_csv(file_path, index = False)
    # Reset the index of the DataFrame

    df = df.reset_index(drop = True)

    return df

df1 = process_file(1,date)
df2 = process_file(2,date)
df3 = process_file(3,date)



In [11]:

def time_in_station_fun(df):
    curr_gabarit = 0
    df['time_in_station'] = 0.0
    index_i = 0  # Initialize index_i outside the loop
    index_f = 0  # Initialize index_f outside the loop

    for i in range(df.shape[0]): #0 to 129
        if i != 0: # not first index
            if ((df.loc[i, 'gabarit'] != curr_gabarit)):
                # routine to present final result
                calc_time_in_station(df, index_i, index_f)
          
            # new gabarit is found, i = f
            if df.loc[i, 'gabarit'] != df.loc[i-1, 'gabarit']:
                curr_gabarit = df.loc[i, 'gabarit']
                index_i = i # starts the interval
                index_f = i
        else: # for first entry, i = f
            curr_gabarit = df.loc[i, 'gabarit']
            index_i = i
            index_f = i

        
        if i != df.shape[0] - 1: # not last index 
            if df.loc[i, 'gabarit'] == df.loc[i+1, 'gabarit']:
                index_f = i+1
        else:
            index_f = i           
            calc_time_in_station(df, index_i, index_f)

    
    return df


# calculates time and writes it in dataframe
def calc_time_in_station(df, index_i, index_f):
    if index_f - index_i == 0:
        time_in_station = 1/60
    else: 
        time_in_station = ((df.loc[index_f, 'ts'] - df.loc[index_i, 'ts']).total_seconds() + 1)/60

    #print(f"time_in_station is {time_in_station}")
    df.loc[index_f, 'time_in_station'] = time_in_station

    return


def plot_time_in_station(df, station):
    df = df[df['time_in_station'] != 0]
    # Create a scatter plot using Plotly Express
    if station == 1:
        title_plot_var = 'Begginning of Stage 3'
    elif station == 2:
        title_plot_var = 'End of Stage 3'
    elif station == 3:
        title_plot_var = 'Begginning of Stage 7'

    title_plot = 'Plot of Time in ' + title_plot_var + ' over Time'

    ts_values = np.array(df['ts'])

    # Create a scatter plot using go.Scatter
    scatter = go.Scatter(x=ts_values, y=df['time_in_station'], mode='markers', 
                        name='Scatter Plot', marker=dict(color='blue'))

    # Create a layout
    layout = go.Layout(
        title=title_plot,
        xaxis=dict(title='Timestamp', range=[date_year + ' 09:00:00', date_year + ' 18:00:00']),  # Set the range for the x-axis
        yaxis=dict(title='Time in Station', range=[-0.2, 3])  # Set the range for the y-axis
    )

    # Create a figure and add the scatter trace
    fig = go.Figure(data=[scatter], layout=layout)

    # Show the plot
    fig.show()
    #fig.write_image("images/fig_" + dataset + "_" + day + "_" + month + "_" + str(station) + ".jpg")



In [12]:
def remove_outliers(df, max_time):
    # Ensure the column 'time_in_station' exists
    if 'time_in_station' not in df.columns:
        print("Error: 'time_in_station' column not found.")
        return df

    # Remove rows where 'time_in_station' is greater than 5 minutes
    df = df[df['time_in_station'] <= max_time]

    return df

In [13]:

df1 = time_in_station_fun(df1)
df1 = remove_outliers(df1, 5)
plot_time_in_station(df1,1)
#display(df1)

df2 = time_in_station_fun(df2)
df2 = remove_outliers(df2, 5)
plot_time_in_station(df2,2)
#display(df2)

df3 = time_in_station_fun(df3)
df3 = remove_outliers(df3, 5)
plot_time_in_station(df3,3)
#display(df3)