In [1]:
# !/usr/bin/env python3
import os
import argparse
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


def load_raw_dataset(input_file_s):
    df_s = pd.read_csv(input_file_s, sep="\t", header=None,
                       names=["date", "time", "sensor", "value", "activity", "log"])
    return df_s


def clean_and_prepare(df_s):
    df_s.log = df_s.log.fillna(method='ffill')
    df_s['activity'] = df_s['activity'].fillna(df_s['log'])
    df_s['activity'] = df_s['activity'].replace("end", "Other")
    df_s['activity'] = df_s['activity'].fillna("Other")
    df_s['activity'] = df_s['activity'].replace("begin", None)
    df_s['activity'] = df_s['activity'].fillna(method='ffill')
    return df_s


def save_activity_dict(df_s, input_file_s):
    filename = "milan_activity_list_step1.pickle"
    activities = df_s.activity.unique()
    # activities.sort()
    dictActivities = {}
    for i_s, activity in enumerate(activities):
        dictActivities[activity] = i_s
    pickle_out = open(filename, "wb")
    pickle.dump(dictActivities, pickle_out)
    pickle_out.close()


def generate_sentence(df2):
    sentence = ""
    sensors = df2.sensor.values
    values = df2.value.values
    for i_s in range(len(sensors)):
        val = values[i_s]
        if i_s == len(sensors) - 1:
            sentence += "{}{}".format(sensors[i_s], val)
        else:
            sentence += "{}{} ".format(sensors[i_s], val)
    return sentence


def segment_activities(df_s):
    activitiesSeq = []
    ponentialIndex = df_s.activity.ne(df_s.activity.shift())
    ii = np.where(ponentialIndex == True)[0]
    for i_s, end in enumerate(ii):
        if i_s > 0:
            df_stmp = df_s[ii[i_s - 1]:end]
            activitiesSeq.append(df_stmp)
    return activitiesSeq


def sliding_window(sequence, win_size_s, step_s=1):
    try:
        iter(sequence)
    except TypeError:
        raise Exception("**ERROR** sequence must be iterable.")
    # if not (isinstance(type(win_size_s), type(0)) and (isinstance(type(step_s), type(0)))):
    #     raise Exception("**ERROR** type(win_size_s) and type(step_s) must be int.")
    # if step_s > win_size_s:
    #     raise Exception("**ERROR** step_s must not be larger than win_size_s.")
    numOfChunks = int(((len(sequence) - win_size_s) / step_s) + 1)

    if win_size_s > len(sequence):
        yield sequence[0:len(sequence)]
    else:
        for i_s in range(0, numOfChunks * step_s, step_s):
            yield sequence[i_s:i_s + win_size_s]


def sequences_to_sentences(activity_sequences_s):
    sentences_s = []
    label_sentences_s = []
    for i_s in range(len(activity_sequences_s)):
        sentence = generate_sentence(activity_sequences_s[i_s])
        sentences_s.append(sentence)
        label_sentences_s.append(activity_sequences_s[i_s].activity.values[0])
    return sentences_s, label_sentences_s

In [21]:
input_file = r"/"
win_size = 100
step = 1

print("STEP 1: Load dataset")
df = pd.read_csv("data_milan",
                 sep='\t', header=None, names=["date","time", "sensor",
                                               "value", "activity", "log"])

df = df[~df['sensor'].str.contains("T0", na=False)]
print(len(df))

print("STEP 2: prepare dataset")
df = clean_and_prepare(df)
save_activity_dict(df,input_file)
# print(df[60:90])

#  Segment dataset in sequence of activity ##
print("STEP 3: segment dataset in sequence of activity")
activity_sequences = segment_activities(df)
print(activity_sequences[:10])
df_txt = df.iloc[:, :-2]
#df_txt.to_csv('dataframe.txt', sep='\t', index=False)


#  Transform sequences of activity in sentences ##
print("STEP 4: transform sequences of activity in sentences")
sentences, label_sentences = sequences_to_sentences(activity_sequences)
print(sentences[0:5])
##################################################################################################################
#print(label_sentences[0:8])
#['Sleeping', 'Other', 'Bed_to_Toilet', 'Other', 'Sleeping']
np.save("origin_label.npy",label_sentences)

#  Indexization ##
print("STEP 5: sentences indexization")
tokenizer = Tokenizer(filters='!"#$%&()*+,-/:;<=>?@[\\]^_`{|}~\t\n')
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
indexed_sentences = tokenizer.texts_to_sequences(sentences)
# for word, index in word_index.items():
#     print(f"{word}: {index}")
##print(word_index)
##################################################################################################################
print(indexed_sentences[:5])
#[[26, 25, 26, 25, 26, 25, 26, 25, 26, 50, 49, 25, 26, 50, 49, 25, 26, 50, 10, 49, 9, 25, 26, 10, 25], [26, 28, 25], 
#[42, 27, 9, 41, 42, 41, 10, 42, 9, 10, 28, 41], [27, 9, 10, 9],
#[26, 25, 50, 26, 49, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 50, 49, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 25, 26, 10, 25, 9, 26, 10, 25, 26, 25]]

#  Split in sliding windows ##
print("STEP 6: split indexed sentences in sliding windows")
X_windowed = []
Y_windowed = []
X_windowed_sen = []
Y_windowed_sen = []
for i, s in enumerate(indexed_sentences):
    chunks = sliding_window(s, win_size, step)
    for chunk in chunks:
        X_windowed.append(chunk)
        Y_windowed.append(label_sentences[i])
print(X_windowed[0:5])
print(Y_windowed[0:5])

        
#  Pad windows ##
print("STEP 7: pad sliding windows")
padded_windows = pad_sequences(X_windowed, padding ='post')
Y_windowed = np.array(Y_windowed)
print(padded_windows[0:10])
print(Y_windowed[0:10])
print(np.unique(Y_windowed))


STEP 1: Load dataset
421392
STEP 2: prepare dataset
STEP 3: segment dataset in sequence of activity
[          date             time sensor value activity  log
0   2009-10-16  00:01:04.000059   M017    ON    Other  NaN
1   2009-10-16  00:01:06.000046   M009    ON    Other  NaN
2   2009-10-16  00:01:07.000064   M017   OFF    Other  NaN
3   2009-10-16  00:01:08.000081   M019    ON    Other  NaN
4   2009-10-16  00:01:09.000028   M009   OFF    Other  NaN
..         ...              ...    ...   ...      ...  ...
77  2009-10-16  03:55:39.000064   M021   OFF    Other  NaN
78  2009-10-16  03:55:43.000085   M021    ON    Other  NaN
79  2009-10-16  03:55:46.000068   M021   OFF    Other  NaN
80  2009-10-16  03:55:48.000049   M021    ON    Other  NaN
81  2009-10-16  03:55:50.000029   M021   OFF    Other  NaN

[82 rows x 6 columns],           date             time sensor value       activity    log
82  2009-10-16  03:55:53.000080   M021    ON  Bed_to_Toilet  begin
83  2009-10-16  03:55:58.000006  

  df_s.log = df_s.log.fillna(method='ffill')
  df_s['activity'] = df_s['activity'].fillna(method='ffill')


STEP 4: transform sequences of activity in sentences
['M017ON M009ON M017OFF M019ON M009OFF M019OFF M020ON M020OFF M020ON M020OFF M020ON M020OFF M020ON M020OFF M020ON M020OFF M020ON M020OFF M019ON M009ON M019OFF M009OFF M016ON M015ON M015OFF M016OFF M022ON M012ON M022OFF M012OFF M003ON M003OFF M005ON M005OFF M005ON M006ON M005OFF M006OFF M008ON M008OFF M019ON M019OFF M019ON M019OFF M009ON M009OFF M016ON M015ON M016OFF M015OFF M022ON M022OFF M022ON M016ON M022OFF M016OFF M011ON M011OFF M011ON M011OFF M011ON M011OFF M011ON M009ON M011OFF M019ON M009OFF M019OFF M028ON M028OFF M021ON M021OFF M021ON M028ON M028OFF M021OFF M021ON M021OFF M021ON M021OFF M021ON M021OFF', 'M021ON M021OFF M028ON M028OFF M020ON M028ON M028OFF M028ON M020OFF M028OFF M025ON M025OFF M013ON M013OFF M013ON M013OFF M025ON', 'M025OFF M028ON', 'M020ON M021ON M020OFF M020ON M028OFF M021OFF M021ON M028ON M020OFF M028OFF M021OFF M021ON M028ON M028OFF M021OFF M021ON M021OFF M021ON M021OFF M028ON M028OFF M020ON M020OFF M020ON

In [22]:
# #  Save files ##
print("STEP 8: save sliding windows and labels")
np.save("X_prepare.npy", padded_windows)
np.save("Y_prepare.npy", Y_windowed)
pickle_file_path = 'milan_activity_list_step1.pickle'
with open(pickle_file_path, 'wb') as pickle_file:
    pickle.dump(indexed_sentences, pickle_file)

STEP 8: save sliding windows and labels


# Replace Y

In [23]:
import numpy as np 
y = np.load("Y_prepare.npy")
y0 = np.load("X_prepare.npy")
print(y[0:10])
print(y0[0:5])

# ['Bed_to_Toilet' 'Chores' 'Desk_Activity' 'Dining_Rm_Activity' 'Eve_Meds' 'Guest_Bathroom' 'Kitchen_Activity' 'Leave_Home' 'Master_Bathroom' 'Master_Bedroom_Activity' 'Meditate' 'Morning_Meds' 'Other' 'Read' 'Sleep' 'Watch_TV']
dict = {'Other': 0, 'Bed_to_Toilet': 1, 'Sleep': 2, 'Master_Bathroom': 3, 'Morning_Meds': 4, 'Leave_Home': 5, 'Read': 6, 'Kitchen_Activity': 7, 'Desk_Activity': 8, 'Guest_Bathroom': 9, 'Chores': 10, 'Meditate': 11,'Watch_TV': 12, 'Dining_Rm_Activity':13, 'Master_Bedroom_Activity':14, 'Eve_Meds':15}
y1 = np.array(list(map(dict.get, y)))
print(y1[0:10])
np.save("Y.npy", y1)


['Other' 'Bed_to_Toilet' 'Other' 'Sleep' 'Sleep' 'Sleep' 'Sleep' 'Sleep'
 'Sleep' 'Sleep']
[[44 42 43 38 41 37 28 27 28 27 28 27 28 27 28 27 28 27 38 42 37 41 50 18
  17 49  8 22  7 21 12 11 52 51 52 54 51 53 14 13 38 37 38 37 42 41 50 18
  49 17  8  7  8 50  7 49 40 39 40 39 40 39 40 42 39 38 41 37 16 15 30 29
  30 16 15 29 30 29 30 29 30 29  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [30 29 16 15 28 16 15 16 27 15 20 19 48 47 48 47 20  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0]
 [19 16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 

In [24]:
print("Word Index Dictionary:")
print(word_index)

Word Index Dictionary:
{'m023off': 1, 'm023on': 2, 'm004off': 3, 'm004on': 4, 'm014off': 5, 'm014on': 6, 'm022off': 7, 'm022on': 8, 'm027off': 9, 'm027on': 10, 'm003off': 11, 'm003on': 12, 'm008off': 13, 'm008on': 14, 'm028off': 15, 'm028on': 16, 'm015off': 17, 'm015on': 18, 'm025off': 19, 'm025on': 20, 'm012off': 21, 'm012on': 22, 'm026off': 23, 'm026on': 24, 'm007off': 25, 'm007on': 26, 'm020off': 27, 'm020on': 28, 'm021off': 29, 'm021on': 30, 'm002off': 31, 'm002on': 32, 'm001off': 33, 'm001on': 34, 'm018off': 35, 'm018on': 36, 'm019off': 37, 'm019on': 38, 'm011off': 39, 'm011on': 40, 'm009off': 41, 'm009on': 42, 'm017off': 43, 'm017on': 44, 'm010off': 45, 'm010on': 46, 'm013off': 47, 'm013on': 48, 'm016off': 49, 'm016on': 50, 'm005off': 51, 'm005on': 52, 'm006off': 53, 'm006on': 54, 'm024off': 55, 'm024on': 56, 'd003close': 57, 'd003open': 58, 'd001close': 59, 'd001open': 60, 'd002open': 61, 'd002close': 62, 'm004on0': 63, 'm019o': 64}


# Replace X

In [36]:
a = y0
a = a.astype(float)

# Entrance
a[np.where(a == 0)] = np.nan
a[np.where(a == 34)] = 1009
a[np.where(a == 33)] = 1008.9
a[np.where(a == 32)] = 1007
a[np.where(a == 31)] = 1006.9
a[np.where(a == 60)] = 1005
a[np.where(a == 59)] = 1004.9
a[np.where(a == 61)] = 1003
a[np.where(a == 62)] = 1002.9

# Room1
a[np.where(a == 22)] = 1021
a[np.where(a == 21)] = 1020.9
a[np.where(a == 6)] = 1022
a[np.where(a == 5)] = 1021.9
a[np.where(a == 18)] = 1023
a[np.where(a == 17)] = 1022.9
a[np.where(a == 50)] = 1024
a[np.where(a == 49)] = 1023.9
a[np.where(a == 8)] = 1025
a[np.where(a == 7)] = 1024.9
a[np.where(a == 2)] = 1026
a[np.where(a == 1)] = 1025.9
a[np.where(a == 56)] = 1027
a[np.where(a == 55)] = 1026.9
a[np.where(a == 58)] = 1028
a[np.where(a == 57)] = 1027.9

# living room
a[np.where(a == 10)] = 1041
a[np.where(a == 9)] = 1040.9
a[np.where(a == 4)] = 1042
a[np.where(a == 3)] = 1041.9
a[np.where(a == 52)] = 1043
a[np.where(a == 51)] = 1042.9
a[np.where(a == 54)] = 1044
a[np.where(a == 53)] = 1043.9
a[np.where(a == 12)] = 1045
a[np.where(a == 11)] = 1044.9

# corridor
a[np.where(a == 46)] = 1061
a[np.where(a == 45)] = 1060.9
a[np.where(a == 40)] = 1062
a[np.where(a == 39)] = 1061.9
a[np.where(a == 44)] = 1063
a[np.where(a == 43)] = 1062.9
a[np.where(a == 36)] = 1064
a[np.where(a == 35)] = 1063.9
a[np.where(a == 42)] = 1065
a[np.where(a == 41)] = 1064.9

# room2
a[np.where(a == 24)] = 1081
a[np.where(a == 23)] = 1080.9
a[np.where(a == 26)] = 1082
a[np.where(a == 25)] = 1081.9
a[np.where(a == 14)] = 1083
a[np.where(a == 13)] = 1082.9
a[np.where(a == 38)] = 1084
a[np.where(a == 37)] = 1083.9

# room3
a[np.where(a == 20)] = 1101
a[np.where(a == 19)] = 1100.9
a[np.where(a == 48)] = 1102
a[np.where(a == 47)] = 1101.9

# bed room
a[np.where(a == 28)] = 1121
a[np.where(a == 27)] = 1120.9
a[np.where(a == 30)] = 1122
a[np.where(a == 29)] = 1121.9
a[np.where(a == 16)] = 1123
a[np.where(a == 15)] = 1122.9

# 

after_change_index = a-1000
np.save("X.npy", after_change_index)
print('ok')

ok


In [27]:
print(after_change_index[:10])

[[ 63.   65.   62.9  84.   64.9  83.9 121.  120.9 121.  120.9 121.  120.9
  121.  120.9 121.  120.9 121.  120.9  84.   65.   83.9  64.9  24.   23.
   22.9  23.9  25.   21.   24.9  20.9  45.   44.9  43.   42.9  43.   44.
   42.9  43.9  83.   82.9  84.   83.9  84.   83.9  65.   64.9  24.   23.
   23.9  22.9  25.   24.9  25.   24.   24.9  23.9  62.   61.9  62.   61.9
   62.   61.9  62.   65.   61.9  84.   64.9  83.9 123.  122.9 122.  121.9
  122.  123.  122.9 121.9 122.  121.9 122.  121.9 122.  121.9   nan   nan
    nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan
    nan   nan   nan   nan]
 [122.  121.9 123.  122.9 121.  123.  122.9 123.  120.9 122.9 101.  100.9
  102.  101.9 102.  101.9 101.    nan   nan   nan   nan   nan   nan   nan
    nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan
    nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan
    nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan   nan
    nan   nan 

In [54]:
import numpy as np
from PIL import Image

y = np.load(r'Y.npy', allow_pickle=True)
x = np.load(r'X.npy', allow_pickle=True)
y = y.astype(str)

activity = []
for i in range(np.size(y)):
    if y[i] == '15':
        activity.append(i)

zero = np.zeros((224, 224))

for i in range(8000):
    for j in range(100):
        if np.isnan(x[activity[i]][j]):
            continue

        index = 223 - x[activity[i]][j]
        if index != 0:
            if index - index.astype(int) != 0:
                index = index.astype(int)
                zero[index][2 * j:(2 * j + 1)] = 0.5
            elif index - index.astype(int) == 0:
                index = index.astype(int)
                zero[index][2 * j:(2 * j + 1)] = 1
            else:
                zero[index][2 * j:(2 * j + 1)] = 0

    not_nan_count = np.sum(~np.isnan(x[activity[i]]))

    for row in range(224):
        cols_with_value = [col for col in range(224) if zero[row][col] > 0]

        if len(cols_with_value) > 0:
            if zero[row][cols_with_value[0]] == 0.5:
                zero[row][0:cols_with_value[0]] = 1

            # 对相邻的on和off之间进行填充
            for k in range(0, len(cols_with_value) - 1):
                if zero[row][cols_with_value[k]] == 1 and zero[row][cols_with_value[k + 1]] == 0.5:
                    zero[row][cols_with_value[k]:cols_with_value[k + 1]] = 1

            if zero[row][cols_with_value[-1]] == 1:
                zero[row][cols_with_value[-1]:2 * not_nan_count - 1] = 1

    for row in range(224):
        cols_with_value = [col for col in range(224) if zero[row][col] == 1]

        zero[row, :] = 0
        for col in cols_with_value:
            zero[row, col] = 1
        ###########

    data = zero * 255
    data = data.astype('uint8')
    img = Image.fromarray(data)
    img.save("png/15/%d.png" % i)

    zero = np.zeros((224, 224))

print('done')

IndexError: list index out of range

In [35]:
print(x[activity[1217:1220]])
print(y[:5])

[[ 123.   121.   120.9  122.9   84.    83.9   83.    81.    82.9   44.
    80.9   43.9   43.    42.9   43.    42.9   43.    42.9   42.    41.9
    41.    45.    21.    40.9   44.9   20.9   21.    25.    20.9   24.9
    26.    25.    22.    27.9   24.9   25.9   25.    24.9   25.    26.
    24.9   25.  -995.    21.    41.    24.9   45.    25.9   40.9   20.9
    41.    40.9   44.9   41.    45.    40.9   44.9   41.    45.    40.9
    41.    40.9   41.    44.9   42.    40.9   41.9   42.    41.9   45.
    44.9   42.    41.9   42.    41.9   42.    41.9   42.    41.9   42.
    41.9   22.    23.    42.  -995.    25.    22.9   24.9   41.9   25.
    24.9   25.    24.9   25.    21.    24.9   20.9   42.    41.9   45. ]
 [ 121.   120.9  122.9   84.    83.9   83.    81.    82.9   44.    80.9
    43.9   43.    42.9   43.    42.9   43.    42.9   42.    41.9   41.
    45.    21.    40.9   44.9   20.9   21.    25.    20.9   24.9   26.
    25.    22.    27.9   24.9   25.9   25.    24.9   25.    26.    24.