In [136]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime
import math 

#import csv
pd.set_option('display.max_rows', None)  # Display all rows

In [137]:
if not os.path.exists("images"):
    os.mkdir("images")

day = "31"
month = "07"
dataset = "task"
date = day + "_" + month #change date accordingly
date_year = "2023-" + month + "-" + day

folder_path = 'data_fabrica/'
#folder_path = 'data_lab/'
def process_file(date):
    file_path = os.path.join(folder_path,f'{dataset}_{date}.csv')
    if os.path.exists(file_path):

        df = pd.read_csv(file_path, parse_dates=['ts'])

        # Remove first empty column if it exists
        if df.shape[1] == 5:
            df = df.drop(df.columns[0], axis=1)
        # Remove entries with 'sec' equal to 0
        df= df[df['sec'] != 0]

        # Write the updated DataFrame back to the CSV file
        df.to_csv(file_path, index = False)
        # Reset the index of the DataFrame
        df = df.reset_index(drop = True)

        # Create a new column 'task_label' based on the mapping
        task_label_mapping = {0: 'Break', 1: 'TEKOX Red Black Cables', 2: 'TEKOX Blue Brown Cables', 3: 'MFALG Assembly',
                            4: 'Screws in Boards', 5: 'Extra activity 1', 6: 'Extra Activity 2', 7: 'Unkown Activity'}
        df['task_name'] = df['task'].map(task_label_mapping)


        emp_label_mapping = {0: 'Fernando', 1: 'Antonio', 2: 'Daniela', 3: 'Diogo', 4: 'Elisabete',5: 'Isabel', 6: 'Joao', 
                            7:'Joaquim',8:'Lidia',9:'Maria',10:'Mario',11:'Patricia',12:'Raquel',13:'Ana',14:'Paulo',15:'Luis'}
        df['emp_name'] = df['empid'].map(emp_label_mapping)
        #display(df)
    else:
        return pd.DataFrame
    return df


df = process_file(date)
#display(df)

In [138]:
def remove_outliers(df,alpha):
    ref_av_time = np.array([-1,12.0, 12, 144, 40, -1, -1, -1])

    ref_av_time[0] = df[df['task'] == 0]['sec'].mean()
    ref_av_time[5] = df[df['task'] == 5]['sec'].mean()
    ref_av_time[6] = df[df['task'] == 6]['sec'].mean()
    ref_av_time[7] = df[df['task'] == 7]['sec'].mean()
    print("Reference average")
    print(ref_av_time)
    outliers = 0

    for index, row in df.iterrows():
        for task_id in range(1,8):
            #if task_id == 0:
            #    if ((row['task'] == task_id) and (row['sec'] > 3000)):
            #        outliers += 1
            #        df = df.drop(index=index)
            if ((row['task'] == task_id) and ((row['sec'] < (1 - alpha) * ref_av_time[task_id]) or (row['sec'] > (1 + alpha) * ref_av_time[task_id]))):
                #print(row['task'],row['sec'])
                outliers += 1
                df = df.drop(index=index)

    print("Average")
    av_time = np.zeros(8)
    for task_id in range(0,8):
        av_time[task_id] = df[df['task'] == task_id]['sec'].mean()
        if not math.isnan(av_time[task_id]):
            print(task_id, av_time[task_id])
    
    print()
    print(f"Outliers removed: {outliers}")
    return df

df1 = remove_outliers(df, alpha = 0.5)
#display(df1)

Reference average
[580.57142857  12.          12.         144.          40.
          nan          nan          nan]
Average
0 580.5714285714286
4 27.462222222222223

Outliers removed: 20


In [139]:
include_break_pie = 0

def plot_task(df):

    color_discrete_task = { 'Break': 'navy', 'TEKOX Red Black Cables': 'tomato', 'TEKOX Blue Brown Cables': 'cornflowerblue', 'MFALG Assembly': 'gold',
                      'Screws in Boards': 'darkgray', 'Extra activity 1': 'darkorange', 'Extra Activity 2': 'violet', 'Unkown Activity': 'limegreen'}
    
    color_discrete_emp = {'Fernando': 'cornflowerblue', 'Luis': 'yellowgreen'}

    column_order = ['ts', 'empid','emp_name','task','task_name','sec']
    df = df[column_order]
    # Exclude task '0' from the DataFrame
    if not include_break_pie:
        df1 = df[df['task'] != 0]
    else:
        df1 = df
    #display(df1)
    #TASK
    # Sum the 'sec' values based on the 'task' column
    sum_by_task = df1.groupby('task_name')['sec'].sum().reset_index()

    for index,row in sum_by_task.iterrows():
        print(row['task_name'],row['sec'])
        sum_task_dt =  pd.Timedelta(seconds = row['sec'])
        print(sum_task_dt)

    # Create a pie chart
    fig_task = px.pie(sum_by_task, names='task_name', 
                      values='sec',
                      color = 'task_name',
                      color_discrete_map = color_discrete_task, 
                      width = 600)
                      #title='Time Spent in Each Task')
    fig_task.show()
    fig_task.write_image('images/taskpie_' + date + '.svg')



    #EMPLOYEE // do not work in break
    df2 = df1[df1['task'] != 0]
    # Sum the 'sec' values based on the 'emp_name' column
    sum_by_emp = df2.groupby('emp_name')['sec'].sum().reset_index()
    #print(sum_by_emp['sec'])
    #print(sum_by_emp)
    for index,row in sum_by_emp.iterrows():
        print(row['emp_name'],row['sec'])
        sum_emp_dt =  pd.Timedelta(seconds = row['sec'])
        print(sum_emp_dt)

    # Create a pie chart
    fig_emp = px.pie(sum_by_emp, names='emp_name', 
                     values='sec', 
                     color = 'emp_name',
                      color_discrete_map = color_discrete_emp,
                      width = 550)
                      #title='Time Worked by Each Employee')
    fig_emp.show()

    fig_emp.write_image('images/emppie_' + date + '.svg')
plot_task(df1)

#for i in unique_values:
#    total_time[i] = 
    

Screws in Boards 12358
0 days 03:25:58


Fernando 12358
0 days 03:25:58


In [140]:
include_break_bar = 0
def plot_tasks_in_time(df):
    #display(px.data.medals_wide())
    # Create a dictionary to map models to colors
    #task_label_mapping = {0: 'Break', 1: 'TEKOX Red Black Cables', 2: 'TEKOX Blue Brown Cables', 3: 'MFALG Assembly',
    #                  4: 'Screws in Boards', 5: 'Extra activity 1', 6: 'Extra Activity 2', 7: 'Unkown Activity'}
    if not include_break_bar:
        df = df[df['task'] != 0]
    else:
        df = df
    #df = df[df['task_name'] == 'Screws in Boards']
    #display(df)

    color_discrete = { 'Break': 'navy', 'TEKOX Red Black Cables': 'tomato', 'TEKOX Blue Brown Cables': 'cornflowerblue', 'MFALG Assembly': 'gold',
                      'Screws in Boards': 'darkgray', 'Extra activity 1': 'darkorange', 'Extra Activity 2': 'violet', 'Unkown Activity': 'limegreen'}

    # Create bars for the first subplot
    fig = px.bar(df, x="ts", y="sec",
                color="task_name",
                labels={"ts": "Timestamp", "sec": "Time (seconds)",'task_name': 'Task names'},
                color_discrete_map = color_discrete)
                #itle='Tasks Performed Over Time')


    # Get the minimum and maximum values of the y-axis
    y_max = df['sec'].max()

    # Set the y-axis range from the minimum to the maximum + 1
    fig.update_layout(yaxis_range=[0, y_max + 1])
    fig.update_layout(bargap=0) # gap between bars of adjacent location coordinates.
    #bargroupgap=0.1) # gap between bars of the same location coordinate.)
    #display(df)
    
    fig.show()



plot_tasks_in_time(df1)


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



This section will now gather a range of days and present the corresponding graphs


In [141]:
start_date = '31/07/2023'
end_date = '27/09/2023'

#interesting break screws mfalg
#start_date = '01/08/2023'
#end_date = '02/08/2023'

#not very int -> extra activity 1 Fernando
#start_date = '06/09/2023'
#end_date = '07/09/2023'


start_date = pd.to_datetime(start_date, format='%d/%m/%Y')
end_date = pd.to_datetime(end_date, format='%d/%m/%Y')

# Create a date range
date_range = pd.date_range(start=start_date, end=end_date)

dfs = []
df_all = pd.DataFrame()

def all_plots(df):
            
    plot_task(df)
    plot_tasks_in_time(df)


    return

def plots_all_dates(date_range):
    
    print_v = 0
    for date in date_range:

        day = str(date.day).zfill(2)
        month = str(date.month).zfill(2)
        date_ = day + "_" + month
        print(date_)

        df = process_file(date_)
        if df.empty:
            print("No data in that date")
            continue            

        df1 = remove_outliers(df, alpha = 0.5)
        
        all_plots(df1)
        dfs.append(df1)

        #df_for_date = get_data_for_date(date)

    return dfs

dfs = plots_all_dates(date_range)
df_all = pd.concat(dfs, ignore_index=True)
print("1-2")
all_plots(df_all)




31_07
Reference average
[580.57142857  12.          12.         144.          40.
          nan          nan          nan]
Average
0 580.5714285714286
4 27.462222222222223

Outliers removed: 20
Screws in Boards 12358
0 days 03:25:58


Fernando 12358
0 days 03:25:58



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



01_08
Reference average
[467.33333333  12.          12.         144.          40.
          nan          nan          nan]
Average
0 467.3333333333333
3 117.55555555555556
4 27.022222222222222

Outliers removed: 2
MFALG Assembly 5290
0 days 01:28:10
Screws in Boards 2432
0 days 00:40:32


Fernando 7722
0 days 02:08:42



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



02_08
Reference average
[2080.66666667   12.           12.          144.           40.
           nan           nan           nan]
Average
0 2080.6666666666665
3 122.0873786407767

Outliers removed: 11
MFALG Assembly 12575
0 days 03:29:35


Fernando 12575
0 days 03:29:35


03_08
No data in that date
04_08
No data in that date
05_08
No data in that date
06_08
No data in that date
07_08
No data in that date
08_08
No data in that date
09_08
No data in that date
10_08
No data in that date
11_08
No data in that date
12_08
No data in that date
13_08
No data in that date
14_08
No data in that date
15_08
No data in that date
16_08
No data in that date
17_08
No data in that date
18_08
No data in that date
19_08
No data in that date
20_08
No data in that date
21_08
No data in that date
22_08
No data in that date
23_08
No data in that date
24_08
No data in that date
25_08
No data in that date
26_08
No data in that date
27_08
No data in that date
28_08
No data in that date
29_08
No data in that date
30_08
No data in that date
31_08
No data in that date
01_09
No data in that date
02_09
No data in that date
03_09
No data in that date
04_09
No data in that date
05_09
No data in that date
06_09
Reference average
[1205.33333333   12.           12.          144.          

Fernando 4241
0 days 01:10:41


07_09
Reference average
[1136.66666667   12.           12.          144.           40.
   30.10580913           nan           nan]
Average
0 1136.6666666666667
5 24.433962264150942

Outliers removed: 58
Extra activity 1 10360
0 days 02:52:40


Fernando 10360
0 days 02:52:40


08_09
No data in that date
09_09
No data in that date
10_09
No data in that date
11_09
No data in that date
12_09
No data in that date
13_09
No data in that date
14_09
No data in that date
15_09
No data in that date
16_09
No data in that date
17_09
No data in that date
18_09
No data in that date
19_09
No data in that date
20_09
No data in that date
21_09
No data in that date
22_09
No data in that date
23_09
No data in that date
24_09
No data in that date
25_09
No data in that date
26_09
No data in that date
27_09
Reference average
[1001.33333333   12.           12.          144.           40.
           nan           nan           nan]
Average
0 1001.3333333333334
1 12.432432432432432
2 12.037037037037036
4 40.76470588235294

Outliers removed: 1
Screws in Boards 693
0 days 00:11:33
TEKOX Blue Brown Cables 325
0 days 00:05:25
TEKOX Red Black Cables 460
0 days 00:07:40


Luis 1478
0 days 00:24:38



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



1-2
Extra activity 1 14601
0 days 04:03:21
MFALG Assembly 17865
0 days 04:57:45
Screws in Boards 15483
0 days 04:18:03
TEKOX Blue Brown Cables 325
0 days 00:05:25
TEKOX Red Black Cables 460
0 days 00:07:40


Fernando 47256
0 days 13:07:36
Luis 1478
0 days 00:24:38



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

