In [4]:
# !/usr/bin/env python3
import os
import argparse
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


def load_raw_dataset(input_file_s):
    df_s = pd.read_csv(input_file_s, sep="\t", header=None,
                       names=["date", "time", "sensor", "value", "activity", "log"])
    return df_s


def clean_and_prepare(df_s):
    df_s.log = df_s.log.fillna(method='ffill')
    df_s['activity'] = df_s['activity'].fillna(df_s['log'])
    df_s['activity'] = df_s['activity'].replace("end", "Other")
    df_s['activity'] = df_s['activity'].fillna("Other")
    df_s['activity'] = df_s['activity'].replace("begin", None)
    df_s['activity'] = df_s['activity'].fillna(method='ffill')
    return df_s


def save_activity_dict(df_s):
    filename = "milan_activity_list_step1.pickle"
    activities = df_s.activity.unique()
    # activities.sort()
    dictActivities = {}
    for i_s, activity in enumerate(activities):
        dictActivities[activity] = i_s
    pickle_out = open(filename, "wb")
    pickle.dump(dictActivities, pickle_out)
    pickle_out.close()


def generate_sentence(df2):
    sentence = ""
    sensors = df2.sensor.values
    values = df2.value.values
    for i_s in range(len(sensors)):
        val = values[i_s]
        if i_s == len(sensors) - 1:
            sentence += "{}{}".format(sensors[i_s], val)
        else:
            sentence += "{}{} ".format(sensors[i_s], val)
    return sentence


def segment_activities(df_s):
    activitiesSeq = []
    ponentialIndex = df_s.activity.ne(df_s.activity.shift())
    ii = np.where(ponentialIndex == True)[0]
    for i_s, end in enumerate(ii):
        if i_s > 0:
            df_stmp = df_s[ii[i_s - 1]:end]
            activitiesSeq.append(df_stmp)
    return activitiesSeq


def sliding_window(sequence, win_size_s, step_s=1):
    try:
        iter(sequence)
    except TypeError:
        raise Exception("**ERROR** sequence must be iterable.")
    # if not (isinstance(type(win_size_s), type(0)) and (isinstance(type(step_s), type(0)))):
    #     raise Exception("**ERROR** type(win_size_s) and type(step_s) must be int.")
    # if step_s > win_size_s:
    #     raise Exception("**ERROR** step_s must not be larger than win_size_s.")
    numOfChunks = int(((len(sequence) - win_size_s) / step_s) + 1)

    if win_size_s > len(sequence):
        yield sequence[0:len(sequence)]
    else:
        for i_s in range(0, numOfChunks * step_s, step_s):
            yield sequence[i_s:i_s + win_size_s]


def sequences_to_sentences(activity_sequences_s):
    sentences_s = []
    label_sentences_s = []
    for i_s in range(len(activity_sequences_s)):
        sentence = generate_sentence(activity_sequences_s[i_s])
        sentences_s.append(sentence)
        label_sentences_s.append(activity_sequences_s[i_s].activity.values[0])
    return sentences_s, label_sentences_s

In [16]:

input_file = r"/"
win_size = 100
step = 1

print("STEP 1: Load dataset")
df = pd.read_csv("data_milan",
                 sep='\t', header=None, names=["date","time", "sensor",
                                               "value", "activity", "log"])

print("STEP 2: prepare dataset")
df = clean_and_prepare(df)
save_activity_dict(df)
print(df[:30])

#  Segment dataset in sequence of activity ##
print("STEP 3: segment dataset in sequence of activity")
activity_sequences = segment_activities(df)
print(activity_sequences[:10])
df_txt = df.iloc[:, :-2]
#df_txt.to_csv('dataframe.txt', sep='\t', index=False)


#  Transform sequences of activity in sentences ##
print("STEP 4: transform sequences of activity in sentences")
sentences, label_sentences = sequences_to_sentences(activity_sequences)
print(sentences[0:5])
##################################################################################################################
#print(label_sentences[0:8])
#['Sleeping', 'Other', 'Bed_to_Toilet', 'Other', 'Sleeping']
np.save("origin_label.npy",label_sentences)

#  Indexization ##
print("STEP 5: sentences indexization")
tokenizer = Tokenizer(filters='!"#$%&()*+,-/:;<=>?@[\\]^_`{|}~\t\n')
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
indexed_sentences = tokenizer.texts_to_sequences(sentences)
# for word, index in word_index.items():
#     print(f"{word}: {index}")
##print(word_index)
##################################################################################################################
print(indexed_sentences[:5])
#[[26, 25, 26, 25, 26, 25, 26, 25, 26, 50, 49, 25, 26, 50, 49, 25, 26, 50, 10, 49, 9, 25, 26, 10, 25], [26, 28, 25], 
#[42, 27, 9, 41, 42, 41, 10, 42, 9, 10, 28, 41], [27, 9, 10, 9],
#[26, 25, 50, 26, 49, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 50, 49, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 10, 25, 9, 26, 10, 25, 26, 25]]

#  Split in sliding windows ##
print("STEP 6: split indexed sentences in sliding windows")
X_windowed = []
Y_windowed = []
X_windowed_sen = []
Y_windowed_sen = []
for i, s in enumerate(indexed_sentences):
    chunks = sliding_window(s, win_size, step)
    for chunk in chunks:
        X_windowed.append(chunk)
        Y_windowed.append(label_sentences[i])
print(X_windowed[0:5])
print(Y_windowed[0:5])

        
#  Pad windows ##
print("STEP 7: pad sliding windows")
padded_windows = pad_sequences(X_windowed, padding ='post')
Y_windowed = np.array(Y_windowed)
print(padded_windows[0:10])
print(Y_windowed[0:10])
print(np.unique(Y_windowed))


STEP 1: Load dataset
STEP 2: prepare dataset


  df_s.log = df_s.log.fillna(method='ffill')
  df_s['activity'] = df_s['activity'].fillna(method='ffill')


TypeError: save_activity_dict() missing 1 required positional argument: 'input_file_s'

In [15]:
print(dictActivities)

NameError: name 'dictActivities' is not defined

In [None]:
# #  Save files ##
print("STEP 8: save sliding windows and labels")
np.save("X.npy", padded_windows)
np.save("Y_prepare.npy", Y_windowed)
pickle_file_path = 'milan_activity_list_step1.pickle'
with open(pickle_file_path, 'wb') as pickle_file:
    pickle.dump(indexed_sentences, pickle_file)

STEP 8: save sliding windows and labels


FileNotFoundError: [Errno 2] No such file or directory: 'milan_activity_list_step1.pickle.pkl'

In [14]:
with open('/Users/zehaokou/Desktop/Technion/AI/plot/new_processing/aruba_activity_list_step1.pickle', 'rb') as f:
    dict = pickle.load(f)
print(dict)

{'Sleeping': 0, 'Other': 1, 'Bed_to_Toilet': 2, 'Meal_Preparation': 3, 'Relax': 4, 'Housekeeping': 5, 'Eating': 6, 'Wash_Dishes': 7, 'Leave_Home': 8, 'Enter_Home': 9, 'Work': 10, 'Respirate': 11}


# Replace Y

In [11]:
import numpy as np 
y = np.load("Y_prepare.npy")
y0 = np.load("X_prepare.npy")
print(y[0:10])
print(y0[0:5])
with open(pickle_file_path, 'rb') as file:
    loaded_data = pickle.load(file)
print(loaded_data[:10])

# ['Bed_to_Toilet' 'Chores' 'Desk_Activity' 'Dining_Rm_Activity' 'Eve_Meds' 'Guest_Bathroom' 'Kitchen_Activity' 'Leave_Home' 'Master_Bathroom' 'Master_Bedroom_Activity' 'Meditate' 'Morning_Meds' 'Other' 'Read' 'Sleep' 'Watch_TV']
dict = {'Sleeping': 0, 'Other': 1, 'Bed_to_Toilet': 2, 'Meal_Preparation': 3, 'Relax': 4, 'Housekeeping': 5, 'Eating': 6, 'Wash_Dishes': 7, 'Leave_Home': 8, 'Enter_Home': 9, 'Work': 10, 'Respirate': 11}
y1 = np.array(list(map(dict.get, y)))
print(y1[0:10])
np.save("/Users/zehaokou/Desktop/Paper1/PCA/Aruba/no_D123M31/Y_noD123M31.npy", y1)


['Sleeping' 'Other' 'Bed_to_Toilet' 'Other' 'Sleeping' 'Other'
 'Meal_Preparation' 'Meal_Preparation' 'Meal_Preparation'
 'Meal_Preparation']
[[26 25 26 25 26 25 26 25 26 50 49 25 26 50 49 25 26 50 10 49  9 25 26 10
  25  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [26 28 25  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [42 27  9 41 42 41 10 42  9 10 28 41  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0 

# Replace X

In [13]:
import numpy as np
import matplotlib.pyplot as plt

# a = np.load('/Users/zehaokou/Desktop/Technion/'
#             'AI/plot/pre_processed_datasets/ARUBA/'
#             'Aruba_250_padded_x_DCNN.npy')
#a = np.load('/Users/zehaokou/Desktop/Technion/AI/plot/new_processing/new_processing_100_padded_x_step1.npy')
# a = np.load('/Users/zehaokou/Desktop/Technion/AI/plot/new_processing_100_padded_x_step1_state.npy')
a = np.load('/Users/zehaokou/Desktop/Paper1/PCA/Aruba/no_D123M31/Aruba_x_noD123M31.npy')
a = a.astype(float)

# Dinning
a[np.where(a == 0)] = np.nan
a[np.where(a == 11)] = 1039
a[np.where(a == 12)] = 1038.9

# Kitchen
a[np.where(a == 5)] = 1053.9
a[np.where(a == 6)] = 1054
a[np.where(a == 7)] = 1049.9
a[np.where(a == 8)] = 1050
a[np.where(a == 15)] = 1053
a[np.where(a == 16)] = 1052.9
a[np.where(a == 19)] = 1051.9
a[np.where(a == 20)] = 1052
a[np.where(a == 35)] = 1050.9
a[np.where(a == 36)] = 1051

# Living
a[np.where(a == 1)] = 1035
a[np.where(a == 2)] = 1034.9
a[np.where(a == 3)] = 1040
a[np.where(a == 4)] = 1039.9
a[np.where(a == 13)] = 1038
a[np.where(a == 14)] = 1037.9
a[np.where(a == 21)] = 1035.9
a[np.where(a == 22)] = 1036
a[np.where(a == 33)] = 1037
a[np.where(a == 34)] = 1036.9

# Office
a[np.where(a == 39)] = 1080.9
a[np.where(a == 40)] = 1081
a[np.where(a == 43)] = 1081.9
a[np.where(a == 44)] = 1082
a[np.where(a == 53)] = 1082.9
a[np.where(a == 54)] = 1083
a[np.where(a == 59)] = 1079.9
a[np.where(a == 60)] = 1080

# Bed 1
a[np.where(a == 9)] = 1025.9
a[np.where(a == 10)] = 1026
a[np.where(a == 25)] = 1021.9
a[np.where(a == 26)] = 1022
a[np.where(a == 27)] = 1023.9
a[np.where(a == 28)] = 1024
a[np.where(a == 37)] = 1024.9
a[np.where(a == 38)] = 1025
a[np.where(a == 41)] = 1022.9
a[np.where(a == 42)] = 1023
a[np.where(a == 47)] = 1019.9
a[np.where(a == 48)] = 1020
a[np.where(a == 49)] = 1020.9
a[np.where(a == 50)] = 1021

# Bed 2
a[np.where(a == 17)] = 1071.9
a[np.where(a == 18)] = 1072
a[np.where(a == 45)] = 1070.9
a[np.where(a == 46)] = 1071

# Bathroom
a[np.where(a == 51)] = 1099.9
a[np.where(a == 52)] = 1091
a[np.where(a == 55)] = 1090
a[np.where(a == 56)] = 1089.9
# a[np.where(a == 94)] = 1100
# a[np.where(a == 95)] = 1100

# Door
a[np.where(a == 57)] = 1139.9
a[np.where(a == 58)] = 1140
a[np.where(a == 67)] = 1119.9
a[np.where(a == 68)] = 1120
# a[np.where(a == 169)] = 1110
# a[np.where(a == 170)] = 1110
# a[np.where(a == 177)] = 1120
# a[np.where(a == 178)] = 1120
a[np.where(a == 65)] = 1109.9
a[np.where(a == 66)] = 1110

# Corridor
a[np.where(a == 23)] = 1059.9
a[np.where(a == 24)] = 1060
a[np.where(a == 29)] = 1026.9
a[np.where(a == 30)] = 1027
a[np.where(a == 31)] = 1060.9
a[np.where(a == 32)] = 1061
a[np.where(a == 61)] = 1033.9
a[np.where(a == 62)] = 1034

# Temperature

a[np.where(a == 63)] = np.nan
a[np.where(a == 64)] = np.nan
a[np.where(a == 65)] = np.nan
a[np.where(a == 66)] = np.nan
for i in np.linspace(69, 93, 93-69+1):
    a[np.where(a == i)] = np.nan
a[np.where(a == 96)] = np.nan
a[np.where(a == 171)] = np.nan
a[np.where(a == 172)] = np.nan
a[np.where(a == 173)] = np.nan
a[np.where(a == 174)] = np.nan
a[np.where(a == 175)] = np.nan
a[np.where(a == 176)] = np.nan
for i in np.linspace(96, 169, 169-96+1):
    a[np.where(a == i)] = np.nan
for i in np.linspace(179, 300, 300-179+1):
    a[np.where(a == i)] = np.nan


after_change_index = a-1000
np.save("//Users/zehaokou/Desktop/Paper1/PCA/Aruba/no_D123M31/state_index.npy", after_change_index)
print('ok')

ok
