In [34]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go

#import csv
pd.set_option('display.max_rows', None)  # Display all rows


In [35]:
#if not os.path.exists("images"):
#    os.mkdir("images")

dataset = "rfid"

def process_file(file_number, date):
    folder_path = dataset + '_' + date + '/'
    file_path = os.path.join(folder_path, f'rfid_{date}_{file_number}.csv')
    if os.path.exists(file_path):
        #print(file_path)
        if file_number == "final":
            #file_path = os.path.join("rfid_fabrica", f'rfid_final.csv')
            df = pd.read_csv(file_path, parse_dates=['Stamp'])
            if df.shape[1] == 6:
                df = df.drop(df.columns[0], axis=1)
                df = df.drop(df.columns[1], axis=1)
                df = df.drop(df.columns[2], axis=1)


            if df.shape[1] == 4:
                df = df.drop(df.columns[3], axis=1)
                df = df.drop(df.columns[1], axis=1)
            df.to_csv(file_path, index = False)

        else:
            df = pd.read_csv(file_path, parse_dates=['ts'])

            # Remove first empty column if it exists
            if df.shape[1] == 3:
                df = df.drop(df.columns[0], axis=1)
            # Remove entries with 'gabarit' equal to 0
            df= df[df['gabarit'] != 0]

            # Write the updated DataFrame back to the CSV file
            df.to_csv(file_path, index = False)
            # Reset the index of the DataFrame
            df = df.reset_index(drop = True) 
    else:
        return pd.DataFrame

    return df

day = "09"
month = "10"
date = day + "_" + month #change date accordingly
date_year = "2023-" + month + "-" + day

df1 = process_file("1",date)
df2 = process_file("2",date)
df3 = process_file("3",date)
df4 = process_file("final",date)




In [36]:
def time_in_station_fun(df):
    """
    Calculates and records the time spent in a station for each occurrence of a 'gabarit' in a DataFrame in seconds.

    Parameters:
        - df: DataFrame containing timestamps and 'gabarit' values.

    Returns:
        DataFrame with an added 'time_in_station' column representing the time
        spent in the station for each occurrence (in seconds).
    """
    
    curr_gabarit = 0
    index_i = 0  # Initialize index_i outside the loop
    index_f = 0  # Initialize index_f outside the loop
    cnt = 0
    df_in = pd.DataFrame(columns=['ts', 'gabarit', 'time_in_station'])
    #df_in['ts'] = pd.to_datetime(df['ts'])

    for i in range(df.shape[0]): #0 to 129
        if i != 0: # not first index
            if ((df.loc[i, 'gabarit'] != curr_gabarit)):
                # routine to present final result
                df_in, cnt = calc_time_in_station(df, df_in, index_i, index_f, cnt)
            
          
            # new gabarit is found, i = f
            if df.loc[i, 'gabarit'] != df.loc[i-1, 'gabarit']:
                curr_gabarit = df.loc[i, 'gabarit']
                index_i = i # starts the interval
                index_f = i
        else: # for first entry, i = f
            curr_gabarit = df.loc[i, 'gabarit']
            index_i = i
            index_f = i

        
        if i != df.shape[0] - 1: # not last index 
            if df.loc[i, 'gabarit'] == df.loc[i+1, 'gabarit']:
                index_f = i+1
        else:
            index_f = i           
            df_in, cnt = calc_time_in_station(df, df_in, index_i, index_f, cnt)

    return df_in


# calculates time and writes it in dataframe
def calc_time_in_station(df, df_in, index_i, index_f, cnt):
    if index_f - index_i == 0:
        time_in_station = 1
    else: 
        time_in_station = ((df.loc[index_f, 'ts'] - df.loc[index_i, 'ts']).total_seconds() + 1)

    df_in.loc[cnt,'ts'] = df.loc[index_i, 'ts']
    df_in.loc[cnt,'gabarit'] = df.loc[index_i, 'gabarit']
    df_in.loc[cnt,'time_in_station'] = time_in_station

    cnt += 1

    return df_in, cnt


def plot_time_in_station(df, station):
    df = df[df['time_in_station'] != 0]
    # Create a scatter plot using Plotly Express
    if station == 1:
        title_plot_var = 'Begginning of Stage 3'
    elif station == 2:
        title_plot_var = 'End of Stage 3'
    elif station == 3:
        title_plot_var = 'Begginning of Stage 7'

    title_plot = 'Plot of Time in ' + title_plot_var + ' over Time'

    ts_values = np.array(df['ts'])
    df['time_in_station'] = df['time_in_station'] / 60

    # Create a scatter plot using go.Scatter
    scatter = go.Scatter(x=ts_values, y=df['time_in_station'], mode='markers', 
                        name='Scatter Plot', marker=dict(color='blue'))

    # Create a layout
    layout = go.Layout(
        title=title_plot,
        xaxis=dict(title='Timestamp', range=[date_year + ' 09:00:00', date_year + ' 18:00:00']),  # Set the range for the x-axis
        yaxis=dict(title='Time in Station', range=[-0.2, 3])  # Set the range for the y-axis
    )

    # Create a figure and add the scatter trace
    fig = go.Figure(data=[scatter], layout=layout)

    # Show the plot
    fig.show()
    fig.write_image("images/fig_" + dataset + "_" + day + "_" + month + "_" + str(station) + ".svg")



In [37]:
plot = 0
display_df = 0
df_in_1 = time_in_station_fun(df1)
df_in_2 = time_in_station_fun(df2)
df_in_3 = time_in_station_fun(df3)

if display_df:
    display(df_in_1)
    display(df_in_2)
    display(df_in_3)

if plot:
    plot_time_in_station(df_in_1,1)
    plot_time_in_station(df_in_2,2)
    plot_time_in_station(df_in_3,3)



In [38]:


def time_between_station(df1, df2, station1, station2, max_time):
    """
    Finds and records the time difference between occurrences of the same 'gabarit' in two DataFrames,
    df1 and df2, within a specified time window.

    Parameters:
        - df1: DataFrame containing timestamps and 'gabarit' values for station1.
        - df2: DataFrame containing timestamps and 'gabarit' values for station2.
        - station1: Identifier for the first station.
        - station2: Identifier for the second station.
        - max_time: Maximum time window to search for matching occurrences (in minutes).
    
    Returns:
        DataFrame with columns 'gabarit', 'ts' (timestamp), and 'time_bet_station1_station2'
        representing the time difference (in minutes) between corresponding occurrences in
        station1 and station2 within the specified time window.
    """
    
    df_bet = pd.DataFrame(columns=['gabarit', f'time_bet_{station1}_{station2}'])
    df_bet['ts'] = pd.NaT  # initialize as datetime type

 
    if (station1 != 3 and station2 != 4): ##1 2 or 2 3 
        column_order = ['ts', 'gabarit', f'time_bet_{station1}_{station2}']
        df_bet = df_bet[column_order]
        df_bet = find_matching_gabarit(df1, df2, station1, station2, max_time, df_bet)
    else:
        df_bet['model'] = ''
        df_bet['state'] = ''
        column_order = ['ts', 'gabarit', 'model', f'time_bet_{station1}_{station2}','state']
        df_bet = df_bet[column_order]
        df_bet = find_matching_gabarit_final(df1, df2, df_bet)

    return df_bet


def find_matching_gabarit(df1,df2,station1, station2, max_time, df_bet):

    cnt = 0
            
    for index in range(df1.shape[0]):
        start_time = df1.loc[index, 'ts']
        end_time = start_time + pd.Timedelta(minutes=max_time)
        matching_rows = df2[(df2['ts'] >= start_time) & 
                            (df2['ts'] <= end_time) 
                                & (df2['gabarit'] == df1.loc[index,'gabarit'])]
        if not matching_rows.empty:
            first_occurrence = matching_rows.iloc[0]
            
            # df1: initial time stamp, time_in_station_1
            # df2: initial time stamp, time_in_station_2
            if (station1 == 1 and station2 == 2):
                time_bet = (first_occurrence['time_in_station'] +
                        (first_occurrence['ts'] - df1.loc[index, 'ts']).total_seconds()) # in seconds
            else:
                time_bet = ((first_occurrence['ts'] - df1.loc[index, 'ts'] # in seconds
                            - pd.Timedelta(seconds = df1.loc[index, 'time_in_station'])).total_seconds())

                #df1: 10:06:00 17
                #     10:07:30 17
                #df2: 10:07:10 17
            #    time_bet = (df1.loc[index, 'time_in_station'] + first_occurrence['time_in_station'])

            df_bet.loc[cnt, 'ts'] = df1.loc[index, 'ts']  # Copy the timestamp for reference
            df_bet.loc[cnt, 'gabarit'] = first_occurrence['gabarit']
            df_bet.loc[cnt, f'time_bet_{station1}_{station2}'] = time_bet #round,2
            cnt += 1
        
    return df_bet

def find_matching_gabarit_final(df1, df2, df_bet):

    cnt = 0
            
    for index in range(df1.shape[0]):
        start_time = (df1.loc[index, 'ts'] + pd.Timedelta(seconds = df1.loc[index, 'time_in_station'])
                      + pd.Timedelta(seconds=20)) # not possible to have a test that takes less than 20 seconds
        matching_rows = df2[(df2['Stamp'] >= start_time)]


        if not matching_rows.empty:
            first_occurrence = matching_rows.iloc[0]
            
            # df1: initial time stamp, time_in_station_1
            # df2: initial time stamp, time_in_station_2
            time_bet = ((first_occurrence['Stamp'] - df1.loc[index, 'ts']).total_seconds()) # in minutes

            df_bet.loc[cnt, 'ts'] = df1.loc[index, 'ts']  # Copy the timestamp for reference
            df_bet.loc[cnt, 'gabarit'] = df1.loc[index,'gabarit']
            #print(cnt)
            #print(type(first_occurrence['Model']))
            #print(first_occurrence['Model'])
            df_bet.loc[cnt, 'model'] = str(first_occurrence['Model'])
            df_bet.loc[cnt, 'state'] = str(first_occurrence['Status'])
            df_bet.loc[cnt, f'time_bet_3_4'] = time_bet
            cnt += 1
        
    return df_bet



display_df_ = 0

df_bet_12 = time_between_station(df_in_1, df_in_2, 1, 2, max_time = 5)
df_bet_23 = time_between_station(df_in_2, df_in_3, 2, 3, max_time = 20)
df_bet_34 = time_between_station(df_in_3, df4, 3, 4, max_time = 0)

if display_df_:
    display(df_bet_12)
    display(df_bet_23)
    display(df_bet_34)




In [39]:
def perc_of_gabarits_found(df_in, df4, station):
    gab_scanned = df_in.shape[0]
    gab_factory = df4.shape[0]
    perc = gab_scanned / gab_factory * 100
    perc = round(perc,2)
    
    if station == 4:
        #display(df4)
        #display(df_in) 
        ok_count = df_in['state'].value_counts().get('OK', 0)
        #print(f'ok: {ok_count}')
        ko_count =gab_scanned - ok_count
        perc_ok = round(ok_count / gab_scanned *100)
        perc_ko = round(100 - perc_ok)
        print(f"Percentage of gabarits scanned in every sensor: {perc}%, which of those occured OK:{ok_count} and KO:{ko_count}")
    else: 
        print(f"Percentage of gabarits scanned in sensor {station}: {perc}%")
    
    return perc


perc1 = perc_of_gabarits_found(df_in_1, df4, 1)
perc2 = perc_of_gabarits_found(df_in_2, df4, 2)
perc3 = perc_of_gabarits_found(df_in_3, df4, 3)


Percentage of gabarits scanned in sensor 1: 74.24%
Percentage of gabarits scanned in sensor 2: 50.51%
Percentage of gabarits scanned in sensor 3: 60.0%


In [100]:

def all_time_between_station(df12, df23, df34):
    #df_bet_all = pd.DataFrame(columns = ['ts1','time_bet_12','ts2', 'ts2','time_bet_23','ts3','time_bet_34','gabarit','model'])
    df_bet_all = pd.DataFrame(columns = ['ts','gabarit', 'model','time_bet_1_2','time_bet_2_3','time_bet_3_4','state']) 
    #display(df12)
    #display(df23)
    #display(df34)

    cnt = 0
    for index in range(df12.shape[0]):

        start_time = (df12.loc[index, 'ts'] + pd.Timedelta(seconds = df12.loc[index, f'time_bet_1_2'])
                       - pd.Timedelta(seconds=2))
        end_time = start_time + pd.Timedelta(seconds=3)

        matching_rows = df23[(df23['ts'] >= start_time) & (df23['ts'] <= end_time) 
                                & (df23['gabarit'] == df12.loc[index,'gabarit'])]
        if not matching_rows.empty:
            
            first_occurrence = matching_rows.iloc[0]

            #df_bet_all.loc[cnt, 'ts'] = df12.loc[index, 'ts']  # Copy the timestamp for reference
            #df_bet_all.loc[cnt, 'gabarit'] = first_occurrence['gabarit']
            #df_bet_all.loc[cnt, 'time_bet_1_2'] = df12.loc[index, 'time_bet_1_2'] 
            #df_bet_all.loc[cnt, 'time_bet_2_3'] = first_occurrence['time_bet_2_3'] 
            #cnt += 1

            start_time2 = (first_occurrence['ts'] + pd.Timedelta(seconds = first_occurrence[f'time_bet_2_3']) 
                          - pd.Timedelta(seconds=1))
            end_time2 = start_time2 +   pd.Timedelta(seconds=3)
            matching_rows2 = df34[(df34['ts'] >= start_time2) & (df34['ts'] <= end_time2) 
                                & (df34['gabarit'] == first_occurrence['gabarit'])]
            
            if not matching_rows2.empty:

                first_occurrence2 = matching_rows2.iloc[0]
                df_bet_all.loc[cnt, 'ts'] = df12.loc[index, 'ts']  # Copy the timestamp for reference
                df_bet_all.loc[cnt, 'gabarit'] = first_occurrence['gabarit']
                df_bet_all.loc[cnt, 'time_bet_1_2'] = df12.loc[index, 'time_bet_1_2'] 
                df_bet_all.loc[cnt, 'time_bet_2_3'] = first_occurrence['time_bet_2_3'] 

                df_bet_all.loc[cnt, 'time_bet_3_4'] = first_occurrence2['time_bet_3_4']
                df_bet_all.loc[cnt, 'model'] = str(first_occurrence2['model'])
                df_bet_all.loc[cnt, 'state'] = str(first_occurrence2['state'])
            
                cnt += 1

    return df_bet_all



def plot_time_in_station(df):
    #display(px.data.medals_wide())

    df['time_bet_1_2'] = df['time_bet_1_2'].astype(int)
    df['time_bet_2_3'] = df['time_bet_2_3'].astype(int)
    df['time_bet_3_4'] = df['time_bet_3_4'].astype(int)
    melted_df = pd.melt(df, id_vars=['ts','model','state'], value_vars=['time_bet_1_2','time_bet_2_3','time_bet_3_4'],var_name='time_bet', value_name='time')    
    melted_df['color'] = melted_df['model'] + '_' + melted_df['time_bet']
    color_discrete_map = {#"HAMLET-49_time_bet_1_2": 'darkblue',
                          #"HAMLET-49_time_bet_2_3": 'cornflowerblue', 
                          #"HAMLET-49_time_bet_3_4": 'lightblue',
                          "SPT130023W_time_bet_1_2": 'darkblue',
                          "SPT130023W_time_bet_2_3": 'dodgerblue', 
                          "SPT130023W_time_bet_3_4": 'lightskyblue',

                          "SPT120018W_time_bet_1_2": 'maroon',
                          "SPT120018W_time_bet_2_3":'red',
                          "SPT120018W_time_bet_3_4": 'lightcoral',

                          "ORIONT-40_time_bet_1_2": 'forestgreen',
                          "ORIONT-40_time_bet_2_3": 'limegreen', 
                          "ORIONT-40_time_bet_3_4": 'lightgreen',
                          
                          "OCTAN-40_time_bet_1_2": 'indigo',
                          "OCTAN-40_time_bet_2_3": 'mediumorchid', 
                          "OCTAN-40_time_bet_3_4": 'plum',}
    
    #display(melted_df)
    melted_df['time'] = round(melted_df['time']/60,2)
    fig = px.bar(melted_df, x="ts", y="time", 
                labels={"ts": "Timestamp", "time": "Time (minutes)"},
                color = 'color',
                color_discrete_map = color_discrete_map,
                title="Time Between Stations")
    fig.show()






df_bet_all = all_time_between_station(df_bet_12, df_bet_23,df_bet_34)
#display(df_bet_all)
perc_all = perc_of_gabarits_found(df_bet_all, df4, 4)

plot_time_in_station(df_bet_all)
    

Percentage of gabarits scanned in every sensor: 23.73%, which of those occured OK:66 and KO:4



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [111]:
def remove_outliers(df, av_time):
    # Ensure the column 'time_in_station' exists
    if 'time_in_station' not in df.columns:
        print("Error: 'time_in_station' column not found.")
        return df
    

    # Remove rows where 'time_in_station' is greater than 5 minutes
    df = df[df['time_in_station'] <= av_time / 60]

    return df

    
def remove_outlier_quart(df,value):
    # Calculate the first quartile (Q1) and third quartile (Q3)
    filtered_df = pd.DataFrame
    Q1 = df[value].quantile(0.25)
    Q3 = df[value].quantile(0.75)

    # Calculate the Interquartile Range (IQR)
    IQR = Q3 - Q1

    # Define the lower and upper bounds to filter outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out values outside the specified range
    filtered_df = df[(df[value] >= lower_bound) & (df[value] <= upper_bound)]

    return(filtered_df)

In [122]:
def median_time_fun(df):
    # Group by 'model' and calculate the median for each group in df_med
    df_med = pd.DataFrame(columns = ['date','model','time_bet_1_2', 'time_bet_2_3', 'time_bet_3_4','med_total_time'])

    df['ts'] = pd.to_datetime(df['ts'])
    date = df['ts'].dt.date

    df['med_total_time'] = df[['time_bet_1_2', 'time_bet_2_3', 'time_bet_3_4']].sum(axis=1)
    df_plot = df

    df = df.drop(['ts', 'gabarit','state'], axis=1) #doesn't matter
  
    median_total_time = df.groupby('model').median()
    df_med = median_total_time.reset_index()
    df_med['date'] = date
        
    #display(df_med)

    return df_plot,df_med #, median_total_time_s


def plot_ok_ko(df):
        #display(melted_df)
    color_discrete_map = {"OK": 'limegreen',
                          "KO": 'red'}
    #display(df)
    df.loc[:,'med_total_time'] = round(df['med_total_time']/60,2)
    fig = px.bar(df, x="ts", y="med_total_time", 
                labels={"ts": "Timestamp", "time": "Time (minutes)"},
                #
                color = 'state',
                #base = 'ts',
                color_discrete_map = color_discrete_map,
                base = 'state',
                #width=10, # customize width here
                #argap = 0.5,
                title="Median Total Time (morning)",)
    target_date = pd.to_datetime(f"2023-{month}-{day}")

    fig.update_xaxes(range=[target_date + pd.to_timedelta("09:00:00"), target_date + pd.to_timedelta("13:00:00")])
  # Set your desired date range
    fig.show()

    fig2 = px.bar(df, x="ts", y="med_total_time", 
                labels={"ts": "Timestamp", "time": "Time (minutes)"},
                color = 'state',
                color_discrete_map = color_discrete_map,
                #width=10, # customize width here
                #argap = 0.5,
                title="Median Total Time (afternoon)",)
    target_date = pd.to_datetime(f"2023-{month}-{day}")

    fig2.update_xaxes(range=[target_date + pd.to_timedelta("14:00:00"), target_date + pd.to_timedelta("18:00:00")])
  # Set your desired date range
    fig2.show()


#median_time,median_total_time = 
df_sum_time, df_med = median_time_fun(df_bet_all)
df_sum_time_filt = remove_outlier_quart(df_sum_time, 'med_total_time')
plot_ok_ko(df_sum_time_filt)




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



This section will now gather a range of days and calculate the time in and between stations and then present the average per day.


In [31]:
start_date = '25/09/2023'
end_date = '11/10/2023'

start_date = pd.to_datetime(start_date, format='%d/%m/%Y')
end_date = pd.to_datetime(end_date, format='%d/%m/%Y')

# Create a date range
date_range = pd.date_range(start=start_date, end=end_date)

df_all_med = pd.DataFrame()

def aggregate_dates(df_all_med,date_range):
    
    print_v = 0
    for date in date_range:

        
        day = str(date.day).zfill(2)
        month = str(date.month).zfill(2)
        date_ = day + "_" + month
        print(date_)

        df1 = process_file("1",date_)
        if df1.empty:
            print("No data in that date")
            continue            
        df2 = process_file("2",date_)
        df3 = process_file("3",date_)
        df4 = process_file("final",date_)
        if df4.empty:
            print("No testing data in that date")
            continue     
        if print_v:
            print(df1)
            print(df2)
            print(df3)
            print(df4)


        df_in_1 = time_in_station_fun(df1)
        df_in_2 = time_in_station_fun(df2)
        df_in_3 = time_in_station_fun(df3)
        if print_v:
            print(df_in_1)
            print(df_in_2)
            print(df_in_3)


        df_bet_12 = time_between_station(df_in_1, df_in_2, 1, 2, max_time = 5)
        df_bet_23 = time_between_station(df_in_2, df_in_3, 2, 3, max_time = 20)
        df_bet_34 = time_between_station(df_in_3, df4, 3, 4, max_time = 0)
        if print_v:
            print(df_bet_12)
            print(df_bet_23)
            print(df_bet_34)


        perc1 = perc_of_gabarits_found(df_in_1, df4, 1)
        perc2 = perc_of_gabarits_found(df_in_2, df4, 2)
        perc3 = perc_of_gabarits_found(df_in_3, df4, 3)

        df_bet_all = all_time_between_station(df_bet_12, df_bet_23,df_bet_34)
        perc4 = perc_of_gabarits_found(df_bet_all, df4, 4)
        if print_v:
            print(df_bet_all)
        
        _,df_med = median_time_fun(df_bet_all)
        display(df_med)
        print()

        df_all_med = pd.concat([df_all_med, df_med])


    return df_all_med

df_all_med = aggregate_dates(df_all_med,date_range)





25_09
Percentage of gabarits scanned in sensor 1: 30.92%
Percentage of gabarits scanned in sensor 2: 64.47%
Percentage of gabarits scanned in sensor 3: 45.39%
Percentage of gabarits scanned in every sensor: 11.18%, which of those occured OK:33 and KO:1


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,7.0,219.5,78.5,352.0,2023-09-25



26_09
No data in that date
27_09
No data in that date
28_09
Percentage of gabarits scanned in sensor 1: 22.78%
Percentage of gabarits scanned in sensor 2: 32.28%
Percentage of gabarits scanned in sensor 3: 44.62%
Percentage of gabarits scanned in every sensor: 7.59%, which of those occured OK:23 and KO:1


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,3.0,225.5,81.0,303.5,2023-09-28



29_09
No data in that date
30_09
No data in that date
01_10
No data in that date
02_10
Percentage of gabarits scanned in sensor 1: 50.33%
Percentage of gabarits scanned in sensor 2: 44.0%
Percentage of gabarits scanned in sensor 3: 39.33%
Percentage of gabarits scanned in every sensor: 17.67%, which of those occured OK:51 and KO:2


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,3.0,266.0,84.0,377.0,2023-10-02



03_10
Percentage of gabarits scanned in sensor 1: 73.8%
Percentage of gabarits scanned in sensor 2: 59.11%
Percentage of gabarits scanned in sensor 3: 47.28%
Percentage of gabarits scanned in every sensor: 29.39%, which of those occured OK:88 and KO:4


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,3.0,221.0,82.0,329.0,2023-10-03



04_10
No data in that date
05_10
No data in that date
06_10
Percentage of gabarits scanned in sensor 1: 72.25%
Percentage of gabarits scanned in sensor 2: 10.98%
Percentage of gabarits scanned in sensor 3: 48.27%
Percentage of gabarits scanned in every sensor: 4.05%, which of those occured OK:13 and KO:1


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,3.0,297.0,70.0,386.0,2023-10-06



07_10
No data in that date
08_10
No data in that date
09_10
Percentage of gabarits scanned in sensor 1: 74.24%
Percentage of gabarits scanned in sensor 2: 50.51%
Percentage of gabarits scanned in sensor 3: 60.0%
Percentage of gabarits scanned in every sensor: 24.41%, which of those occured OK:66 and KO:6


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT120018W,11.0,224.0,96.5,366.5,2023-10-09



10_10
Percentage of gabarits scanned in sensor 1: 84.71%
Percentage of gabarits scanned in sensor 2: 45.54%
Percentage of gabarits scanned in sensor 3: 62.42%
Percentage of gabarits scanned in every sensor: 21.34%, which of those occured OK:67 and KO:0


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT130023W,33.0,200.0,104.0,343.0,2023-10-10



11_10
Percentage of gabarits scanned in sensor 1: 78.15%
Percentage of gabarits scanned in sensor 2: 27.41%
Percentage of gabarits scanned in sensor 3: 63.33%
Percentage of gabarits scanned in every sensor: 9.63%, which of those occured OK:25 and KO:1


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date
0,SPT130023W,55.0,238.5,97.0,413.5,2023-10-11





In [44]:

def plot_time_in_station_days(df):
    display(df)
    
    color_discrete_map_ = {"SPT130023W": 'dodgerblue',
                          "SPT120018W": 'red',
                          "ORIONT-40": 'limegreen', 
                          "OCTAN-40": 'mediumorchid'
                          }


    # Create a bar plot for each model
    df['med_total_time_'] = df['med_total_time']/60
    #df['med_total_time'] = df['med_total_time'].astype(int)
    fig = px.bar(df, x='date', y='med_total_time_', 
                 color='model', 
                 color_discrete_map = color_discrete_map_,
                 barmode='group')

    # Update layout for better visualization

    fig.update_layout(
        title='Median Total Time by Model',
        xaxis_title='Date',
        yaxis_title='Median Total Time (minutes)',
        legend_title='Model',
        barmode='group'
    )

    # Show the plot
    fig.show()

    melted_df = pd.melt(df, id_vars=['date','model','med_total_time'], value_vars=['time_bet_1_2','time_bet_2_3','time_bet_3_4'],var_name='time_bet', value_name='time')    
    melted_df['color'] = melted_df['model'] + '_' + melted_df['time_bet']
    color_discrete_map = {"SPT130023W_time_bet_1_2": 'darkblue',
                          "SPT130023W_time_bet_2_3": 'dodgerblue', 
                          "SPT130023W_time_bet_3_4": 'lightskyblue',
                          #"HAMLET-49_time_bet_1_2": 'darkblue',

                          "SPT120018W_time_bet_1_2": 'maroon',
                          "SPT120018W_time_bet_2_3":'red',
                          "SPT120018W_time_bet_3_4": 'lightcoral',
                          #"ORIONT-40_time_bet_1_2": 'forestgreen',
                          #"ORIONT-40_time_bet_2_3": 'limegreen', 
                          #"ORIONT-40_time_bet_3_4": 'lightgreen',
                            
                          #"ORIONT-40_time_bet_1_2": 'forestgreen',
                          #"ORIONT-40_time_bet_2_3": 'limegreen', 
                          #"ORIONT-40_time_bet_3_4": 'lightgreen',
                          
                          "OCTAN-40_time_bet_1_2": 'indigo',
                          "OCTAN-40_time_bet_2_3": 'mediumorchid', 
                          "OCTAN-40_time_bet_3_4": 'plum',}



    #model	time_bet_1_2, time_bet_2_3, time_bet_3_4, med_total_time, date
    
    for model in melted_df['model'].unique():
        df_model = melted_df[melted_df['model'] == model]

        df_model.loc[:,'time_'] = df_model['time']/60
        print("yo")
        #df_model['time'] = df_model['time'].astype(int)
        # Create a bar plot for the current model
        fig4 = px.bar(df_model, x='date', y='time_', 
                    color='color',
                    labels={'time': 'Time'},
                    color_discrete_map = color_discrete_map,
                    title=f'Time per process for Model: {model}')


        #
        fig4.update_xaxes(range=[start_date - pd.Timedelta(days = 1), end_date + pd.Timedelta(days = 1)])  # Set your desired date range


        # Update layout for better visualization
        fig4.update_layout(
            xaxis_title='Date',
            yaxis_title='Time per station (minutes)',
            legend_title='Time Bet',
        )

        # Show the plot
        fig4.show()


plot_time_in_station_days(df_all_med)


Unnamed: 0,model,time_bet_1_2,time_bet_2_3,time_bet_3_4,med_total_time,date,med_total_time_
0,SPT120018W,7.0,219.5,78.5,352.0,2023-09-25,5.866667
0,SPT120018W,3.0,225.5,81.0,303.5,2023-09-28,5.058333
0,SPT120018W,3.0,266.0,84.0,377.0,2023-10-02,6.283333
0,SPT120018W,3.0,221.0,82.0,329.0,2023-10-03,5.483333
0,SPT120018W,3.0,297.0,70.0,386.0,2023-10-06,6.433333
0,SPT120018W,11.0,224.0,96.5,366.5,2023-10-09,6.108333
0,SPT130023W,33.0,200.0,104.0,343.0,2023-10-10,5.716667
0,SPT130023W,55.0,238.5,97.0,413.5,2023-10-11,6.891667


yo




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



yo




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

