In [1]:
import datetime 
import pandas as pd
import numpy as np

## Read file

In [2]:
df = pd.read_csv('hri_proj_db.csv')
df.head()

Unnamed: 0,timestamp,is_boss
0,11:45:11,False
1,16:07:28,False
2,13:44:26,True
3,13:09:56,False
4,15:31:22,True


## Train

In [3]:
train_num = int(0.9 * len(df['is_boss']))
print("Row to train:",train_num)
train = df.iloc[:train_num,:]
test = df.iloc[train_num:,:]
print(train.shape, test.shape)

Row to train: 482
(482, 2) (54, 2)


In [4]:
# Chop the day time into 32 blocks of 15-minute time block
def convertBlock(timestamp):
    time = timestamp.split(':')
    # #hour * 60min + min = 480 min
    hour = int(time[0]) - 9 # hour start at 09:00
    minute = int(time[1])
    total = hour * 60 + minute
    return int(total / 15)

# Add a column for the block
train['block'] = [convertBlock(train.loc[i, 'timestamp']) for i in range(len(train['timestamp']))]
train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['block'] = [convertBlock(train.loc[i, 'timestamp']) for i in range(len(train['timestamp']))]


Unnamed: 0,timestamp,is_boss,block
0,11:45:11,False,11
1,16:07:28,False,28
2,13:44:26,True,18
3,13:09:56,False,16
4,15:31:22,True,26


In [5]:
train['is_boss'].value_counts()

True     262
False    220
Name: is_boss, dtype: int64

In [6]:
# Calculate probabilities from the dataset
# P(Time = T)
p_time = 1/32

# P(Boss = True)
p_boss = train['is_boss'].value_counts()[1] / len(train['is_boss'])

# P(Time = T | Boss = True) = P(Time = T ∩ Boss = True) / P(Boss = True)
p_time_and_boss = np.zeros(32)
for i in range(32):
    p_time_and_boss[i] = len(train[(train['is_boss'] == True) & (train['block'] == i)]) / len(train['is_boss'])
    
p_time_given_boss = p_time_and_boss / p_boss
p_time_given_boss

array([0.0610687 , 0.03435115, 0.03816794, 0.02671756, 0.02290076,
       0.03435115, 0.03435115, 0.01526718, 0.03435115, 0.03816794,
       0.02290076, 0.03053435, 0.01526718, 0.01908397, 0.03435115,
       0.03053435, 0.03053435, 0.02671756, 0.01908397, 0.03816794,
       0.0610687 , 0.04961832, 0.01908397, 0.01908397, 0.04198473,
       0.03053435, 0.03816794, 0.02290076, 0.02671756, 0.02290076,
       0.03435115, 0.02671756])

# Test

In [7]:
# Target: P(Boss = True | Time = T)
def predictBoss(timestamp):
    block = convertBlock(timestamp)
    # P(Boss = True | Time = T) = (P(Time = T | Boss = True) * P(Boss = True)) / P(Time = T)
    p_boss_given_time = (p_time_given_boss[block] * p_boss) / p_time
    # Set the confidence threshold to 0.7
    if p_boss_given_time > 0.7:
        return True
    return False

In [8]:
# Test
results = np.zeros(len(test['is_boss']))
for i in range(len(test['is_boss'])):
    if predictBoss(df.loc[i,'timestamp']) == df.loc[i,'is_boss']:
        results[i] = 1
# Show accuracy
print('Accuracy =', np.sum(results)/len(results))

Accuracy = 0.5740740740740741
