In [1]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import datetime
#import csv
pd.set_option('display.max_rows', None)  # Display all rows

In [8]:
if not os.path.exists("images"):
    os.mkdir("images")

day = "06"
month = "09"
dataset = "task"
date = day + "_" + month #change date accordingly
date_year = "2023-" + month + "-" + day

folder_path = 'data_fabrica/'
#folder_path = 'data_lab/'
def process_file(date):
    file_path = os.path.join(folder_path,f'{dataset}_{date}.csv')

    df = pd.read_csv(file_path, parse_dates=['ts'])

    # Remove first empty column if it exists
    if df.shape[1] == 5:
        df = df.drop(df.columns[0], axis=1)
    # Remove entries with 'sec' equal to 0
    df= df[df['sec'] != 0]

    # Write the updated DataFrame back to the CSV file
    df.to_csv(file_path, index = False)
    # Reset the index of the DataFrame
    df = df.reset_index(drop = True)

    # Create a new column 'task_label' based on the mapping
    task_label_mapping = {0: 'Break', 1: 'TEKOX Red Black Cables', 2: 'TEKOX Blue Brown Cables', 3: 'MFALG Assembly',
                        4: 'Screws in Boards', 5: 'Extra activity 1', 6: 'Extra Activity 2', 7: 'Unkown Activity'}
    df['task_name'] = df['task'].map(task_label_mapping)


    emp_label_mapping = {0: 'Fernando', 1: 'Antonio', 2: 'Daniela', 3: 'Diogo', 4: 'Elisabete',5: 'Isabel', 6: 'Joao', 
                        7:'Joaquim',8:'Lidia',9:'Maria',10:'Mario',11:'Patricia',12:'Raquel',13:'Ana',14:'Paulo',15:'Luis'}
    df['emp_name'] = df['empid'].map(emp_label_mapping)
    #display(df)
    return df


df = process_file(date)
display(df)

Unnamed: 0,ts,task,empid,sec,task_name,emp_name
0,2023-09-06 10:14:01,0,0,107,Break,Fernando
1,2023-09-06 10:14:28,5,0,25,Extra activity 1,Fernando
2,2023-09-06 10:14:59,5,0,28,Extra activity 1,Fernando
3,2023-09-06 10:15:21,5,0,24,Extra activity 1,Fernando
4,2023-09-06 10:15:48,5,0,26,Extra activity 1,Fernando
5,2023-09-06 10:16:12,5,0,24,Extra activity 1,Fernando
6,2023-09-06 10:16:39,5,0,26,Extra activity 1,Fernando
7,2023-09-06 10:17:05,5,0,25,Extra activity 1,Fernando
8,2023-09-06 10:17:28,5,0,23,Extra activity 1,Fernando
9,2023-09-06 10:17:52,5,0,23,Extra activity 1,Fernando


In [15]:
def remove_outliers(df):
    av_time = np.array([-1,12.0, 12, 144, 40, -1, -1, -1])
    av_time[0] = df[df['task'] == 0]['sec'].mean()
    av_time[5] = df[df['task'] == 5]['sec'].mean()
    av_time[6] = df[df['task'] == 6]['sec'].mean()
    av_time[7] = df[df['task'] == 7]['sec'].mean()
    print(av_time)

    for index, row in df.iterrows():
        for task_id in range(1,8):
            if ((row['task'] == task_id) and ((row['sec'] < 0.5 * av_time[task_id]) or (row['sec'] > 1.5 * av_time[task_id]))):
                print(row['task'],row['sec'])
                df = df.drop(index=index)
                
        return df

df1 = remove_outliers(df)

[382.25        12.          12.         144.          40.
  26.96059113          nan          nan]


In [3]:
include_break = 0

def plot_task(df):
    

    color_discrete_task = { 'Break': 'black', 'TEKOX Red Black Cables': 'tomato', 'TEKOX Blue Brown Cables': 'cornflowerblue', 'MFALG Assembly': 'yellow',
                      'Screws in Boards': 'darkgray', 'Extra activity 1': 'darkorange', 'Extra Activity 2': 'violet', 'Unkown Activity': 'limegreen'}
    
    color_discrete_emp = {'Fernando': 'orange', 'Luis': 'yellowgreen'}

    column_order = ['ts', 'empid','emp_name','task','task_name','sec']
    df = df[column_order]
    # Exclude task '0' from the DataFrame
    if not include_break:
        df1 = df[df['task'] != 0]
    else:
        df1 = df

    #TASK
    # Sum the 'sec' values based on the 'task' column
    sum_by_task = df1.groupby('task_name')['sec'].sum().reset_index()
    print(sum_by_task)

    # Create a pie chart
    fig_task = px.pie(sum_by_task, names='task_name', 
                      values='sec',
                      color = 'task_name',
                      color_discrete_map = color_discrete_task, 
                      title='Time Spent in Each Task')
    fig_task.show()

    #EMPLOYEE // do not work in break
    df2 = df1[df1['task'] != 0]
    # Sum the 'sec' values based on the 'emp_name' column
    sum_by_emp = df2.groupby('emp_name')['sec'].sum().reset_index()
    print(sum_by_task)

    # Create a pie chart
    fig_emp = px.pie(sum_by_emp, names='emp_name', 
                     values='sec', 
                     color = 'emp_name',
                      color_discrete_map = color_discrete_emp,
                      title='Time Worked by Each Employee')
    fig_emp.show()

plot_task(df1)

#for i in unique_values:
#    total_time[i] = 
    

          task_name   sec
0  Extra activity 1  5473


          task_name   sec
0  Extra activity 1  5473


In [6]:
def plot_tasks_in_time(df):
    #display(px.data.medals_wide())
    # Create a dictionary to map models to colors
    #task_label_mapping = {0: 'Break', 1: 'TEKOX Red Black Cables', 2: 'TEKOX Blue Brown Cables', 3: 'MFALG Assembly',
    #                  4: 'Screws in Boards', 5: 'Extra activity 1', 6: 'Extra Activity 2', 7: 'Unkown Activity'}
    if not include_break:
        df = df[df['task'] != 0]
    else:
        df = df

    color_discrete = { 'Break': 'black', 'TEKOX Red Black Cables': 'tomato', 'TEKOX Blue Brown Cables': 'cornflowerblue', 'MFALG Assembly': 'yellow',
                      'Screws in Boards': 'darkgray', 'Extra activity 1': 'darkorange', 'Extra Activity 2': 'violet', 'Unkown Activity': 'limegreen'}
    

    # Create subplots
    

    # Create bars for the first subplot
    fig = px.bar(df, x="ts", y="sec",
                color="task_name",
                labels={"ts": "Timestamp", "sec": "Time (seconds)",'task_name': 'Task names'},
                color_discrete_map = color_discrete,
                title='Tasks Performed Over Time')
    subplots = 0
    if subplots:
        fig_sub = make_subplots(rows=2, cols=2)
        for trace in fig.data:
            fig_sub.add_trace(trace, row=1, col=1)
        # Set x-axis limits for the first subplot
        fig_sub.update_xaxes(range=[date_year + ' 10:14:00', date_year + ' 10:31:00'], row=1, col=1)

        # Create bars for the second subplot
        fig2 = fig
        for trace in fig2.data:
            fig_sub.add_trace(trace, row=1, col=2)
        # Set x-axis limits for the second subplot
        fig_sub.update_xaxes(range=[date_year + ' 11:35:00', date_year + ' 12:00:00'], row=1, col=2)


        fig3 = fig
        for trace in fig3.data:
            fig_sub.add_trace(trace, row=2, col=1)
        # Update layout
        fig_sub.update_xaxes(range=[date_year + ' 13:04:00', date_year + ' 13:55:00'], row=2, col=1)



    # Get the minimum and maximum values of the y-axis
    y_min = df['sec'].min()
    y_max = df['sec'].max()

    # Set the y-axis range from the minimum to the maximum + 1
    fig.update_layout(yaxis_range=[y_min, y_max + 1])
    fig.update_layout(bargap=0) # gap between bars of adjacent location coordinates.
    #bargroupgap=0.1) # gap between bars of the same location coordinate.)
    #display(df)
    
    fig.show()

plot_tasks_in_time(df1)


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

