# Data Preprocessing

In [1]:
# Step 1: Read the dataset
import pandas as pd
df = pd.read_csv("ai-light-bulb-dataset.csv")
df.head()

Unnamed: 0,Light Sensor,Time,Light State
0,0.0,00:00,Off
1,0.0,01:00,Off
2,0.0,02:00,Off
3,0.0,03:00,Off
4,0.0,04:00,Off


In [6]:
# Step 2: Data preprocessing
df_tmp = df.copy()

# Light senser : group light sensor value into 4 levels
for i in range (len(df_tmp)):
    if (df_tmp.loc[i, 'Light Sensor'] >= 0.0 and df_tmp.loc[i, 'Light Sensor'] <= 0.25):
        df_tmp.loc[i, 'Light Sensor'] = 0
    elif (df_tmp.loc[i, 'Light Sensor'] >= 0.26 and df_tmp.loc[i, 'Light Sensor'] <= 0.50):
        df_tmp.loc[i, 'Light Sensor'] = 1
    elif (df_tmp.loc[i, 'Light Sensor'] >= 0.51 and df_tmp.loc[i, 'Light Sensor'] <= 0.75):
        df_tmp.loc[i, 'Light Sensor'] = 2
    elif (df_tmp.loc[i, 'Light Sensor'] >= 0.76 and df_tmp.loc[i, 'Light Sensor'] <= 1.0):
        df_tmp.loc[i, 'Light Sensor'] = 3
df_tmp['Light Sensor'] = df_tmp['Light Sensor'].astype('int64')  # convert float64 to int64

# Time : get just the hour of the time
df_tmp['Time'] = pd.to_datetime(df_tmp['Time'], format='%H:%M').dt.hour

# Light state : encode 'Off' to 0 and 'On' to 1
df_tmp.replace({'Light State': {'Off': 0, 'On':1}}, inplace=True)

df_tmp.head(n=10)
    

Unnamed: 0,Light Sensor,Time,Light State
0,0,0,0
1,0,1,0
2,0,2,0
3,0,3,0
4,0,4,0
5,0,4,0
6,0,5,1
7,0,6,1
8,1,7,1
9,1,8,0


In [7]:
# Step 3 : convert pandas data frame to nupy array
df_np = df_tmp.to_numpy()
print(df_np)
# split the data into features and label
X = df_np[:, 0:2] # feature
y = df_np[:, 2] # label

[[ 0  0  0]
 [ 0  1  0]
 [ 0  2  0]
 [ 0  3  0]
 [ 0  4  0]
 [ 0  4  0]
 [ 0  5  1]
 [ 0  6  1]
 [ 1  7  1]
 [ 1  8  0]
 [ 0  8  1]
 [ 1  9  0]
 [ 1  9  1]
 [ 2 10  0]
 [ 2 11  0]
 [ 3 12  0]
 [ 3 12  0]
 [ 1 12  1]
 [ 3 13  0]
 [ 3 13  0]
 [ 3 14  0]
 [ 1 14  1]
 [ 2 15  0]
 [ 0 15  1]
 [ 2 16  0]
 [ 2 17  0]
 [ 0 17  1]
 [ 1 18  0]
 [ 1 18  1]
 [ 1 19  0]
 [ 0 19  1]
 [ 0 20  1]
 [ 0 20  1]
 [ 0 21  1]
 [ 0 21  1]
 [ 0 22  1]
 [ 0 23  1]]


# Create Frequency Table

In [10]:
# Step 4 : create frequency table for feature light sensor
import numpy as np
freq_table_lsensor = np.zeros((4,2))
for i in range(len(y)):
    freq_table_lsensor[X[i,0], y[i]] += 1
print(freq_table_lsensor)

[[ 6. 12.]
 [ 4.  5.]
 [ 5.  0.]
 [ 5.  0.]]


In [12]:
# Step 5 : create frequency table for time
freq_table_time = np.zeros((24,2))
for i in range(len(y)):
    freq_table_time[X[i,1], y[i]] += 1
print(freq_table_time)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [2. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 1.]
 [1. 1.]
 [1. 0.]
 [1. 0.]
 [2. 1.]
 [2. 0.]
 [1. 1.]
 [1. 1.]
 [1. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 2.]
 [0. 2.]
 [0. 1.]
 [0. 1.]]


# Calculate Prior Probability

In [15]:
# Step 6 : create function for calculating label prior probability
def label_prior_prob(light_state):
    count = 0
    for i in range(len(y)):
        if (y[i] == light_state):
            count += 1
    prob = count/len(y)
    return prob

# test the function
label_prior_prob(0) # probability of 'Off'

0.5405405405405406

# Calculate Likelihood

In [18]:
# Step 7: create function for calculating likelihood
def likelihood(light_sensor, time, light_state):
    # calculate probability of light sensor = light_sensor given light state = light_state
    prob_lsensor_lstate_lap = (freq_table_lsensor[light_sensor, light_state]+1) / (np.sum(freq_table_lsensor[:, light_state])+4*1)
    # calculate probability of time = time given light state = light_state
    prob_time_lstate_lap = (freq_table_time[time, light_state]+1) / (np.sum(freq_table_time[:, light_state])+24*1)
    # calculate probability of light sensor = light_sensor and time = time given light state = light_state
    prob_lsensor_time_lstate_lap = prob_lsensor_lstate_lap * prob_time_lstate_lap
    return prob_lsensor_time_lstate_lap

# test the function
likelihood(2, 12, 0) # likelihood when the light sensor level 2 and time = 12:00 given light state 'Off'

0.017045454545454544

# Calculate Posterior Probability

In [19]:
# Step 8 : create function for calculating posterior probability
def posterior_prob(light_sensor, time, light_state):
    prob = (likelihood(light_sensor, time, light_state) * label_prior_prob(light_state))
    return prob

In [20]:
# Step 9: make prediction
# probability when light state 'Off' given light sensor level 2 and time = 12:00
print(posterior_prob(2, 12, 0))
# probability when light state 'On' given light sensor level 2 and time = 12:00
print(posterior_prob(2, 12, 1))

0.009213759213759214
0.0010672693599522868


# Generate C array of the Dataset

In [21]:
# Step 10 : print features as C array
print("uint8_t X[%d][%d] = {" %(np.shape(X)[0], np.shape(X)[1]), end='')
for i in range(np.shape(X)[0]): # row
    print("{", end='')
    for j in range(np.shape(X)[1]): # columns
        if (j == (np.shape(X)[1]-1)):
            print("%d" %(X[i,j]), end='')
        else:
            print("%d, "%(X[i,j]), end='')
    if (i == (np.shape(X)[0]-1)):
        print("}", end='')
    else:
        print("},", end='')
print("}:")

uint8_t X[37][2] = {{0, 0},{0, 1},{0, 2},{0, 3},{0, 4},{0, 4},{0, 5},{0, 6},{1, 7},{1, 8},{0, 8},{1, 9},{1, 9},{2, 10},{2, 11},{3, 12},{3, 12},{1, 12},{3, 13},{3, 13},{3, 14},{1, 14},{2, 15},{0, 15},{2, 16},{2, 17},{0, 17},{1, 18},{1, 18},{1, 19},{0, 19},{0, 20},{0, 20},{0, 21},{0, 21},{0, 22},{0, 23}}:


In [23]:
# Step 11 : print label as C array
print("uint8_t y[%d] = {" %(np.shape(y)[0]), end='')
for i in range(np.shape(y)[0]):
    if (i == (np.shape(y)[0]-1)):
        print("%d" %(y[i]), end='')
    else:
        print("%d," %(y[i]), end='')
print("};")

uint8_t y[37] = {0,0,0,0,0,0,1,1,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,1,0,1,1,1,1,1,1,1};
