In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from neural_networks import Sequential
from layers import Dense
from errors import MeanSquaredError
from utils import BagOfWords, preprocess_data, get_batches

In [2]:
df = pd.read_csv("dataset.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Task,Importance,Time to complete (minutes)
0,0,Fix broken appliance,7,60
1,1,Sort and organize important documents,4,60
2,2,Plan product launch event,9,120
3,3,Volunteer at the local shelter,9,120
4,4,Walk the Dog,3,20


In [3]:
X = preprocess_data(list(df["Task"]))
vocabulary = set()
for example in X:
    for word in example:
        vocabulary.add(word)
bag_of_words = BagOfWords(vocabulary=list(vocabulary))
X = bag_of_words.encode(X)
y = np.asarray(df["Time to complete (minutes)"]).reshape((-1, 1))
print(X.shape)
print(y.shape)

(10003, 1564)
(10003, 1)


In [4]:
X_batches, y_batches = get_batches(X, y, batch_size=16)
print(X_batches[0].shape)

(16, 1564)


In [5]:
model = Sequential([
    Dense(input_units=X.shape[1], output_units=100, activation='relu'),
    Dense(input_units=100, output_units=1, activation='linear')
])
model.compile(error=MeanSquaredError(), learning_rate=1e-3)

In [None]:
cost_history = model.fit(X_batches, y_batches, epochs=100)

Epoch #0: Cost = 846.0853610364876
Epoch #1: Cost = 828.9901334909646
Epoch #2: Cost = 619.315275136668
Epoch #3: Cost = 553.4531918780696
Epoch #4: Cost = 534.6999931951887
Epoch #5: Cost = 529.3803282131024
Epoch #6: Cost = 527.8811301825299
Epoch #7: Cost = 527.4639090576729
Epoch #8: Cost = 527.3506629330333
Epoch #9: Cost = 527.3214971048156
Epoch #10: Cost = 527.3148705890528
Epoch #11: Cost = 527.3138899152007
Epoch #12: Cost = 527.3140976698913
Epoch #13: Cost = 527.3144178935016
Epoch #14: Cost = 527.3146488419194
Epoch #15: Cost = 527.3147893769483
Epoch #16: Cost = 527.3148693817274
Epoch #17: Cost = 527.314913543624
Epoch #18: Cost = 527.3149375492806
Epoch #19: Cost = 527.314950495598
Epoch #20: Cost = 527.31495744871
Epoch #21: Cost = 527.3149611748561
Epoch #22: Cost = 527.3149631693589


In [None]:
plt.plot(cost_history[1:])
plt.title("Learning curve")
plt.xlabel("Epochs")
plt.show()

In [None]:
import pickle as pkl

with open('task_duration_model.pkl', 'wb') as file:
    pkl.dump(model, file)