In [75]:
import pandas as pd
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

ModuleNotFoundError: No module named 'tensorflow.compat'

In [44]:
# Cargar los datos
data = pd.read_csv('datos_limpios0.csv')
data.shape

(1161, 15)

In [45]:
data.head()
headers = data.columns.tolist()
headers

['date',
 'quarter',
 'department',
 'day',
 'team',
 'targeted_productivity',
 'smv',
 'wip',
 'over_time',
 'incentive',
 'idle_time',
 'idle_men',
 'no_of_style_change',
 'no_of_workers',
 'actual_productivity']

**Factores:**
- **Date**: Date in MM-DD-YYYY
- **Quarter**: A portion of the month. A month was divided into four quarters
- **Department**: Associated department with the instance
- **Day**: Day of the Week
- **Team**: Associated team number with the instance
- **Targeted productivity**: Targeted productivity set by the Authority for each team for each day.
- **SMV**: Standard Minute Value, it is the allocated time for a task
- **WIP**: Work in progress. Includes the number of unfinished items for products
- **Over time**: Represents the amount of overtime by each team in minutes
- **Incentive**: Represents the amount of financial incentive (in BDT) that enables or motivates a particular course of action.
- **Idle time**: The amount of time when the production was interrupted due to several reasons
- **Idle men**: The number of workers who were idle due to production interruption
- **No of style change**: Number of changes in the style of a particular product
- **No of workers**: Number of workers in each team

**Variable de respuesta:**
- **Actual productivity**: The actual % of productivity that was delivered by the workers. It ranges from 0-1.



In [46]:
# Convertir la columna 'Date' a tipo datetime
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Extraer día, mes y año de la columna 'Date'
data['Day'] = data['date'].dt.day
data['Month'] = data['date'].dt.month
data['Year'] = data['date'].dt.year

# Eliminar la columna 'Date'
data.drop(columns=['date'], inplace=True)

# Mostrar las primeras filas del DataFrame
data.head()

Unnamed: 0,quarter,department,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity,Day,Month,Year
0,Quarter1,sweing,Thursday,8,0.8,26.16,1108.0,7080,98,0.0,0,0,59.0,0.940725,1,1,2015
1,Quarter1,finishing,Thursday,1,0.75,3.94,0.0,960,0,0.0,0,0,8.0,0.8865,1,1,2015
2,Quarter1,sweing,Thursday,11,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057,1,1,2015
3,Quarter1,sweing,Thursday,12,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057,1,1,2015
4,Quarter1,sweing,Thursday,6,0.8,25.9,1170.0,1920,50,0.0,0,0,56.0,0.800382,1,1,2015


In [47]:
data.shape

(1161, 17)

In [53]:
cat_int_feats = ['team', 'no_of_style_change', 'Day', 'Month', 'Year']

In [54]:
cat_str_feats = ['quarter', 'department', 'day']

In [55]:
num_feats = ['targeted_productivity','smv', 'wip', 'over_time','incentive', 'idle_time', 'idle_men','no_of_workers']

In [56]:
feats_ordered = cat_int_feats + cat_str_feats + num_feats

In [57]:
len(feats_ordered)

16

In [58]:
df = data[feats_ordered+['actual_productivity']]

In [59]:
df.head()

Unnamed: 0,team,no_of_style_change,Day,Month,Year,quarter,department,day,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_workers,actual_productivity
0,8,0,1,1,2015,Quarter1,sweing,Thursday,0.8,26.16,1108.0,7080,98,0.0,0,59.0,0.940725
1,1,0,1,1,2015,Quarter1,finishing,Thursday,0.75,3.94,0.0,960,0,0.0,0,8.0,0.8865
2,11,0,1,1,2015,Quarter1,sweing,Thursday,0.8,11.41,968.0,3660,50,0.0,0,30.5,0.80057
3,12,0,1,1,2015,Quarter1,sweing,Thursday,0.8,11.41,968.0,3660,50,0.0,0,30.5,0.80057
4,6,0,1,1,2015,Quarter1,sweing,Thursday,0.8,25.9,1170.0,1920,50,0.0,0,56.0,0.800382


In [60]:
train = df.sample(frac=0.8, random_state=100)
train.head()

Unnamed: 0,team,no_of_style_change,Day,Month,Year,quarter,department,day,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_workers,actual_productivity
892,12,0,25,2,2015,Quarter4,sweing,Wednesday,0.8,15.26,1053.0,4080,50,0.0,0,34.0,0.800402
492,4,0,29,1,2015,Quarter5,finishing,Thursday,0.8,4.3,0.0,1200,0,0.0,0,10.0,0.989
137,3,0,8,1,2015,Quarter2,sweing,Thursday,0.8,19.87,857.0,9900,50,0.0,0,55.0,0.800274
638,1,0,8,2,2015,Quarter2,finishing,Sunday,0.8,3.94,0.0,1200,0,0.0,0,10.0,0.893067
918,2,1,26,2,2015,Quarter4,sweing,Thursday,0.7,30.33,398.0,6960,0,0.0,0,58.0,0.662379


In [61]:
train.shape

(929, 17)

In [62]:
test = df.drop(train.index)
test.head()

Unnamed: 0,team,no_of_style_change,Day,Month,Year,quarter,department,day,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_workers,actual_productivity
2,11,0,1,1,2015,Quarter1,sweing,Thursday,0.8,11.41,968.0,3660,50,0.0,0,30.5,0.80057
11,10,0,1,1,2015,Quarter1,sweing,Thursday,0.75,19.31,578.0,6480,45,0.0,0,54.0,0.712205
13,10,0,1,1,2015,Quarter1,finishing,Thursday,0.65,3.94,0.0,960,0,0.0,0,8.0,0.705917
17,4,0,1,1,2015,Quarter1,sweing,Thursday,0.65,23.69,861.0,7200,0,0.0,0,60.0,0.52118
34,2,0,3,1,2015,Quarter1,sweing,Saturday,0.75,19.87,944.0,6600,45,0.0,0,55.0,0.750243


In [63]:
val = train.sample(frac=0.2, random_state=100)
val.shape

(186, 17)

In [64]:
train = train.drop(val.index)

In [65]:
print(train.shape)
print(val.shape)
print(test.shape)

(743, 17)
(186, 17)
(232, 17)


Función para convertir de dataframe (pandas) a dataset (tensorflow), separando características y etiquetas

In [71]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("actual_productivity")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

In [70]:
train_ds = dataframe_to_dataset(train)
val_ds = dataframe_to_dataset(val)
test_ds = dataframe_to_dataset(test)

AttributeError: module 'tensorflow' has no attribute 'data'

In [None]:
type(train_ds)