In [86]:
import os
import numpy as np
from collections import defaultdict
import datetime
import pytz
import pandas as pd
class project_data():

    def __init__(self, root_path):
        self.root_path = root_path
        self.pre_student = ['u00', 'u01', 'u02', 'u03', 'u04', 'u05', 'u07', 'u08', 'u09', 'u10', 'u12', 'u13', 'u14',
                            'u15', 'u16', 'u17', 'u18', 'u19', 'u20', 'u22', 'u23', 'u24', 'u27', 'u30', 'u31', 'u32',
                            'u33', 'u34', 'u35', 'u36', 'u39', 'u42', 'u43', 'u44', 'u45', 'u46', 'u47', 'u49', 'u50',
                            'u51', 'u52', 'u53', 'u56', 'u57', 'u58', 'u59']
        self.post_student = ['u00', 'u01', 'u02', 'u03', 'u04', 'u05', 'u07', 'u09', 'u10', 'u14', 'u15', 'u16', 'u17',
                             'u19', 'u20', 'u23', 'u24', 'u27', 'u30', 'u31', 'u32', 'u33', 'u34', 'u35', 'u36', 'u42',
                             'u43', 'u44', 'u45', 'u46', 'u47', 'u49', 'u51', 'u52', 'u53', 'u56', 'u59']
        self.tz = pytz.timezone('America/New_York')
        self.duration = 6_048_000

    def FlourishingScale(self):
        flour_path = os.path.join(self.root_path, 'Outputs/FlourishingScale.csv')
        flour_file = np.array(pd.read_csv(flour_path))
        flour = defaultdict(int)
        for r in flour_file:
            if r[1] == 'pre':
                continue
            elif r[1] == 'post':
                for c in range(2, 10):
                    if np.isnan(float(r[c])):
                        continue
                    flour[r[0]] += float(r[c])
        return flour

    def PANAS(self):
        panas_path = os.path.join(self.root_path, 'Outputs/panas.csv')
        panas_file = np.array(pd.read_csv(panas_path))
        pos_panas = defaultdict(int)
        neg_panas = defaultdict(int)
        positive_choices = [2, 5, 9, 10, 12, 13, 15, 16, 18]
        for r in panas_file:
            if r[1] == 'pre':
                continue
            elif r[1] == 'post':
                for c in range(2, 20):
                    if np.isnan(float(r[c])):
                        continue
                    if c in positive_choices:
                        pos_panas[r[0]] += float(r[c])
                    else:
                        neg_panas[r[0]] += float(r[c])
        return pos_panas, neg_panas

    def conversation_freq(self):
        conv_freq_day = defaultdict(float)
        conv_freq_eve = defaultdict(float)
        conv_freq_nig = defaultdict(float)
        csv_dir = os.path.join(self.root_path, 'Inputs/sensing/conversation')
        for s in self.pre_student:
            file_path = os.path.join(csv_dir, 'conversation_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path))
            duration = csv_file[-1][1] - csv_file[0][0]
            alpha = self.duration / duration
            # print(alpha)
            for r in csv_file:
                time = datetime.datetime.fromtimestamp(r[0], tz=self.tz)
                if 9 <= time.hour < 18:
                    conv_freq_day[s] += alpha
                elif time.hour >= 18:
                    conv_freq_eve[s] += alpha
                else:
                    conv_freq_nig[s] += alpha
        return conv_freq_day, conv_freq_eve, conv_freq_nig

    def conversation_dura(self):
        conv_dura_day = defaultdict(float)
        conv_dura_eve = defaultdict(float)
        conv_dura_nig = defaultdict(float)
        csv_dir = os.path.join(self.root_path, 'Inputs/sensing/conversation')
        for s in self.pre_student:
            file_path = os.path.join(csv_dir, 'conversation_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path))
            duration = csv_file[-1][1] - csv_file[0][0]
            alpha = self.duration / duration
            for r in csv_file:
                start_time = datetime.datetime.fromtimestamp(r[0], tz=self.tz)
                end_time = datetime.datetime.fromtimestamp(r[1], tz=self.tz)
                time = (end_time - start_time).seconds * alpha
                if 9 <= start_time.hour < 18:
                    conv_dura_day[s] += time
                elif start_time.hour >= 18:
                    conv_dura_eve[s] += time
                else:
                    conv_dura_nig[s] += time
        return conv_dura_day, conv_dura_eve, conv_dura_nig

    def co_location(self):
        csv_dir = os.path.join(self.root_path, 'Inputs/sensing/bluetooth')
        co_location = defaultdict(float)
        temp_list = []
        for s in self.pre_student:
            file_path = os.path.join(csv_dir, 'bt_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path))
            temp_dict = defaultdict(int)
            for r in csv_file:
                temp_dict[r[1]] += 1
            temp_list.append(temp_dict)
        for s, dic in zip(self.pre_student, temp_list):
            file_path = os.path.join(csv_dir, 'bt_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path))
            duration = csv_file[-1][0] - csv_file[0][0]
            alpha = self.duration / duration
            # print(s, alpha)
            for r in csv_file:
                if dic[r[1]] > 10:
                    co_location[s] += alpha
        return co_location

    def activity(self):
        activity_day = defaultdict(float)
        activity_eve = defaultdict(float)
        activity_nig = defaultdict(float)
        csv_dir = os.path.join(self.root_path, 'Inputs/sensing/activity')
        for s in self.pre_student:
            file_path = os.path.join(csv_dir, 'activity_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path))
            duration = csv_file[-1][0] - csv_file[0][0]
            alpha = self.duration / duration
            # print(s, alpha)
            for r in csv_file:
                if r[1] == 0:
                    continue
                time = datetime.datetime.fromtimestamp(r[0], tz=self.tz)
                if 9 <= time.hour < 18:
                    activity_day[s] += alpha
                elif time.hour >= 18:
                    activity_eve[s] += alpha
                else:
                    activity_nig[s] += alpha
        return activity_day, activity_eve, activity_nig

    def traveled_distance(self):
        distance_day = defaultdict(int)
        distance_eve = defaultdict(int)
        distance_nig = defaultdict(int)
        csv_dir = os.path.join(self.root_path, 'Inputs/sensing/gps')
        for s in self.pre_student:
            file_path = os.path.join(csv_dir, 'gps_' + s + '.csv')
            csv_file = np.array(pd.read_csv(file_path, index_col=False))
            x, y = 0, 0
            for r in csv_file:
                if x == 0 and y == 0:
                    x, y = r[4], r[5]
                    continue
                time = datetime.datetime.fromtimestamp(r[0], tz=self.tz)
                if 9 <= time.hour < 18:
                    distance_day[s] += self.block_distance(x, y, r[4], r[5])
                elif time.hour >= 18:
                    distance_eve[s] += self.block_distance(x, y, r[4], r[5])
                else:
                    distance_nig[s] += self.block_distance(x, y, r[4], r[5])
                x, y = r[4], r[5]
        return distance_day, distance_eve, distance_nig

    def block_distance(self, x1, y1, x2, y2):
        return abs(x1 - x2) + abs(y1 - y2)

    def indoor_mobility(self):
        indoor_mobility_day = defaultdict(int)
        indoor_mobility_eve = defaultdict(int)
        indoor_mobility_nig = defaultdict(int)
        activity_dir = os.path.join(self.root_path, 'Inputs/sensing/activity')
        wifi_dir = os.path.join(self.root_path, 'Inputs/sensing/wifi')
        for s in self.pre_student:
            activity_path = os.path.join(activity_dir, 'activity_' + s + '.csv')
            activity_file = np.array(pd.read_csv(activity_path, index_col=False))
            wifi_path = os.path.join(wifi_dir, 'wifi_' + s + '.csv')
            wifi_file = np.array(pd.read_csv(wifi_path, index_col=False))
            start = min(activity_file[0][0], wifi_file[0][0])
            end = max(activity_file[-1][0], wifi_file[-1][0])
            temp = np.zeros((end - start + 3))
            for r in activity_file:
                if r[1] == 0:
                    temp[r[0] - start] = 0
                else:
                    temp[r[0] - start] = 1
                    temp[r[0] - start + 1] = 1
                    temp[r[0] - start + 2] = 1
            last_timestamp = 0
            for r in wifi_file:
                if r[0] == last_timestamp:
                    continue
                last_timestamp = r[0]
                if temp[r[0] - start] == 1:
                    time = datetime.datetime.fromtimestamp(r[0], tz=self.tz)
                    if 9 <= time.hour < 18:
                        indoor_mobility_day[s] += 1
                    elif time.hour >= 18:
                        indoor_mobility_eve[s] += 1
                    else:
                        indoor_mobility_nig[s] += 1
        return indoor_mobility_day, indoor_mobility_eve, indoor_mobility_nig

    def sleep_duration(self):
        sleep = defaultdict(list)
        activity_dir = os.path.join(self.root_path, 'Inputs/sensing/activity')
        audio_dir = os.path.join(self.root_path, 'Inputs/sensing/audio')
        phonecharge_dir = os.path.join(self.root_path, 'Inputs/sensing/phonecharge')
        phonelock_dir = os.path.join(self.root_path, 'Inputs/sensing/phonelock')
        for s in self.pre_student:
            activity_path = os.path.join(activity_dir, 'activity_' + s + '.csv')
            activity_file = np.array(pd.read_csv(activity_path, index_col=False))
            audio_path = os.path.join(audio_dir, 'audio_' + s + '.csv')
            audio_file = np.array(pd.read_csv(audio_path, index_col=False))
            phonecharge_path = os.path.join(phonecharge_dir, 'phonecharge_' + s + '.csv')
            phonecharge_file = np.array(pd.read_csv(phonecharge_path, index_col=False))
            phonelock_path = os.path.join(phonelock_dir, 'phonelock_' + s + '.csv')
            phonelock_file = np.array(pd.read_csv(phonelock_path, index_col=False))
            start = min(activity_file[0][0], audio_file[0][0], phonelock_file[0][0], phonecharge_file[0][0])
            end = max(activity_file[-1][0], audio_file[-1][0], phonelock_file[-1][0], phonecharge_file[-1][0])
            temp = np.ones((end - start + 3, 4), dtype=np.float32)
            temp[:, 0] = temp[:, 0] * 0.5445
            temp[:, 1] = temp[:, 1] * 0.3484
            temp[:, 2] = temp[:, 2] * 0
            temp[:, 3] = temp[:, 3] * 0
            for r in activity_file:
                if r[1] != 0:
                    temp[r[0] - start][0] = 0
                    temp[r[0] - start + 1][0] = 0
                    temp[r[0] - start + 2][0] = 0
                else:
                    temp[r[0] - start][0] = 0.5445
            for r in audio_file:
                if r[1] != 0:
                    temp[r[0] - start][1] = 0
                    temp[r[0] - start + 1][1] = 0
                    temp[r[0] - start + 2][1] = 0
                else:
                    temp[r[0] - start][1] = 0.3484
            for r in phonelock_file:
                for rr in range(r[0], r[1]):
                    temp[rr - start][2] = 0.0512
            for r in phonecharge_file:
                for rr in range(r[0], r[1]):
                    temp[rr - start][3] = 0.0469
            time = 0
            for r in temp:
                if np.sum(r) > 0.9:
                    time += 1
            sleep[s] = time / (end - start)
            print(s)
        return sleep





import os
import numpy as np


In [87]:
root_path = r'C:\Users\kyrie\Desktop\comp9417\project\StudentLife_Dataset'
#root_path = os.path.join(os.path.dirname(__file__), 'StudentLife_Dataset')
data = project_data(root_path=root_path)


In [88]:
stud_id = data.pre_student
flour = data.FlourishingScale()
panas_pos, panas_neg = data.PANAS()


In [89]:
counter = 0
for key in flour:
    counter += flour[key]
mean_flour = counter / len(flour)


In [90]:
counter = 0
for key in panas_pos:
    counter += panas_pos[key]
mean_panas_pos = counter / len(panas_pos)


In [91]:
counter = 0
for key in panas_neg:
    counter += panas_neg[key]
mean_panas_neg = counter / len(panas_neg)


In [92]:
y_flour, y_panas_pos, y_panas_neg = [], [], []
for s in stud_id:
    if flour[s] > mean_flour:
        y_flour.append(1)
    else:
        y_flour.append(0)
    if panas_pos[s] > mean_panas_pos:
        y_panas_pos.append(1)
    else:
        y_panas_pos.append(0)
    if panas_neg[s] > mean_panas_neg:
        y_panas_neg.append(1)
    else:
        y_panas_neg.append(0)
y_flour = np.array(y_flour)
y_panas_pos = np.array(y_panas_pos)
y_panas_neg = np.array(y_panas_neg)
    

In [93]:
conv_dura_day, conv_dura_eve, conv_dura_nig = data.conversation_dura()
conv_freq_day, conv_freq_eve, conv_freq_nig = data.conversation_freq()
co_location = data.co_location()
activity_day, activity_eve, activity_nig = data.activity()
distance_day, distance_eve, distance_nig = data.traveled_distance()
indoor_mobility_day, indoor_mobility_eve, indoor_mobility_nig = data.indoor_mobility()


In [94]:
#sleep_duration = data.sleep_duration()


u00


In [0]:
# conversation duration (pre) 0.294 0.066
# conversation duration during evening (pre) 0.362 0.022
# number of co-locations (post) 0.324 0.050
x_flour = []
for s in stud_id:
    temp = []
    temp.append(conv_dura_day[s] + conv_dura_eve[s] + conv_dura_nig[s])
    temp.append(conv_dura_eve[s])
    temp.append(co_location[s])
    x_flour.append(temp)
x_flour = np.array(x_flour)


In [0]:
# sleep duration (pre) -0.360 0.025
# conversation frequency during day (pre) -0.403 0.010
# conversation frequency during evening (post) -0.345 0.034
# conversation duration during day (post) -0.328 0.044
# number of co-locations (post) -0.362 0.025
# activity duration for day (post) -0.326 0.049
# activity duration for evening (post) -0.464 0.004
# traveled distance (post) -0.338 0.044
# traveled distance for day (post) -0.336 0.042
# indoor mobility for day (post) -0.332 0.045
x_panas = []
for s in stud_id:
    temp = []
    #temp.append(sleep_duration[s])
    temp.append(conv_freq_day[s])
    temp.append(conv_freq_eve[s])
    temp.append(conv_dura_day[s])
    temp.append(conv_dura_eve[s])
    temp.append(co_location[s])
    temp.append(activity_day[s])
    temp.append(activity_eve[s])
    temp.append(distance_day[s] + distance_eve[s] + distance_nig[s])
    temp.append(distance_day[s])
    temp.append(indoor_mobility_day[s])
    x_panas.append(temp)
x_panas = np.array(x_panas)


In [0]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt


In [0]:
print(x_flour.shape)
print(x_panas.shape)
print(y_flour.shape)
print(y_panas_pos.shape)
print(y_panas_neg.shape)
scaler = MinMaxScaler()
x_flour_scaled = scaler.fit_transform(x_flour)
x_panas_scaled = scaler.fit_transform(x_panas)


In [0]:
clf = DecisionTreeClassifier(random_state=42)
paramaters = {'max_depth': [2, 4, 6, 8, 10], 'min_samples_leaf': [2, 4, 6, 8, 10], 
              'min_samples_split': [2, 4, 6, 8, 10]}
grid = GridSearchCV(estimator=clf, param_grid=paramaters, scoring='accuracy', cv=10)


In [0]:
grid.fit(x_flour_scaled, y_flour)
# print('网格搜索-度量记录：',grid.cv_results_)  
print('网格搜索-最佳度量值:',grid.best_score_)  
print('网格搜索-最佳参数：',grid.best_params_)  
print('网格搜索-最佳模型：',grid.best_estimator_) 


In [0]:
grid.fit(x_panas_scaled, y_panas_pos)
# print('网格搜索-度量记录：',grid.cv_results_)  
print('网格搜索-最佳度量值:',grid.best_score_)  
print('网格搜索-最佳参数：',grid.best_params_)  
print('网格搜索-最佳模型：',grid.best_estimator_) 


In [0]:
grid.fit(x_panas_scaled, y_panas_neg)
# print('网格搜索-度量记录：',grid.cv_results_)  
print('网格搜索-最佳度量值:',grid.best_score_)  
print('网格搜索-最佳参数：',grid.best_params_)  
print('网格搜索-最佳模型：',grid.best_estimator_) 


