In [1]:
import seaborn as sns
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
import datetime
import warnings
import time
from scipy import stats
from sklearn import linear_model
import itertools
import calendar
import json
warnings.filterwarnings('ignore')

# Back End Script 

### We are writing a python script to generate a table of insights for a particular user. This script will exist in one function, we can define functions within the function, but we want to streamline as much as possible for maximum efficiency. 

#### Insights To Generate  
- steps by sleep 
- sleep by heartrate 
- 

 _id: UUID (primary key)

owner: UUID (reference to User)

source: STRING - Describes where this insight came from. For example, insights created manually from the admin dashboard will have their source set as admin.

category: STRING - indicates which category/vertical the insight belongs to

type: STRING - A short identifier unique for each type of insight. Example: An insight recommending the user to take x steps to improve sleep might be identified by a type such as "step-count:sleep:achieve-steps".

message: TEXT - A textual representation of the insight, to be used in places where we can't display rich media, such as notifications. Example: "Get 2500 steps in by noon for a more restful sleep".

data: JSON - a JSON blob with detailed info on the insight. We make this a JSON field so that it can be easily extended in the future with new types of insights, without requiring changes to the DB schema.

upvote: BOOLEAN - A boolean field that is set to true when the user marks this insight as "useful" or gives it a "thumbs up". Default to false.

downvote: BOOLEAN - A boolean field that is set to true when the user marks this insight as "not useful" or gives it a "thumbs down". Default to false.


For the initial version, the JSON structure of the data field will be relatively simple:

{
    "message": "Get 2500 steps in by noon for a more restful sleep",
    "metrics": ["step-count", "sleep"],
    "explanation": "Science shows that getting physical activity and sunlight in the morning can help to keep your circadian rhythm in optimal shape. Your circadian rhythm is..."
}

In [2]:
#This is what the table is going to look like. 
#There are nine categories. Above is a short description of each column. 
UserInsight = pd.DataFrame(columns=['_id','owner','source','category','type','message','data','upvote','downvote'])
UserInsight

Unnamed: 0,_id,owner,source,category,type,message,data,upvote,downvote


In [3]:
#Example of how to insert into the table. 

blob = {"message":"If you keep losing weight","metrics":'The correlated variables',"explanation":'Leave this'}
example = ['000','John','Apple Watch','HeartRate','health-weight:heart-rate:positive','If you keep losing weight, your heart rate will drop',blob,'N/A','N/A']

UserInsight.loc[len(UserInsight)] = example
UserInsight

Unnamed: 0,_id,owner,source,category,type,message,data,upvote,downvote
0,0,John,Apple Watch,HeartRate,health-weight:heart-rate:positive,"If you keep losing weight, your heart rate wil...","{u'metrics': u'The correlated variables', u'me...",,


### Function  

In [9]:
#Note that the data wont actually be pulled from a csv in real time. 

data_sample = pd.read_csv('20171016-210106-DataSample.csv',dtype={"value": float})
data_sample2 = pd.read_csv('20171016-210304-DataSample.csv',dtype={"value": float})
data_sample3 = pd.read_csv('20171016-210529-DataSample.csv',dtype={"value": float})
#data_sample4 = pd.read_csv('20171031-235959-DataSample.csv',dtype={"value": float})



data_sample = data_sample.append([data_sample2,data_sample3])
data_sample['startDate'] = pd.to_datetime(data_sample['startDate']) 
data_sample['owner'].replace('00000000-5854-8d6f-b8eb-cf14a0f795df','00000000-56ff-538b-2223-e1800b5e3ddb',inplace=True)
data_sample['startDate'] = pd.to_datetime(data_sample['startDate'])
data_sample['endDate'] = pd.to_datetime(data_sample['endDate'])
data_sample.index = data_sample['startDate']

to_tdelta = lambda row: row['endDate'] - row['startDate']
data_sample['duration'] = data_sample.apply(to_tdelta, axis=1)
data_sample['day_of_week'] = data_sample['startDate'].dt.dayofweek
data_sample.sort_index(inplace=True)
data_sample[data_sample['type']=='health-sleep'].head()

Unnamed: 0_level_0,_id,owner,source,sourceId,sourceName,type,startDate,endDate,value,content,originalData,createdAt,updatedAt,duration,day_of_week
startDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2012-01-19 07:49:00,dee37862-6869-488e-b301-c21bc69e6f1e,00000000-56ff-538b-2223-e1800b5e3ddb,fitbit,6011012:1,Fitbit,health-sleep,2012-01-19 07:49:00,2012-01-19 07:51:00,0.0,,,2017-10-11 11:07:13.318881-07,2017-10-11 17:32:01.100877-07,00:02:00,3
2012-01-19 07:49:00,36e3fbf8-53bd-4f17-ae4c-314f59aab97e,00000000-56ff-538b-2223-e1800b5e3ddb,fitbit,6011012:1,Fitbit,health-sleep,2012-01-19 07:49:00,2012-01-19 07:51:00,0.0,,,2017-10-11 18:05:32.162199-07,2017-10-12 15:09:13.597742-07,00:02:00,3
2012-01-19 07:49:00,dee37862-6869-488e-b301-c21bc69e6f1e,00000000-56ff-538b-2223-e1800b5e3ddb,fitbit,6011012:1,Fitbit,health-sleep,2012-01-19 07:49:00,2012-01-19 07:51:00,0.0,,,2017-10-11 11:07:13.318881-07,2017-10-11 17:32:01.100877-07,00:02:00,3
2012-01-19 07:49:00,dee37862-6869-488e-b301-c21bc69e6f1e,00000000-56ff-538b-2223-e1800b5e3ddb,fitbit,6011012:1,Fitbit,health-sleep,2012-01-19 07:49:00,2012-01-19 07:51:00,0.0,,,2017-10-11 11:07:13.318881-07,2017-10-11 17:32:01.100877-07,00:02:00,3
2012-01-19 07:49:00,36e3fbf8-53bd-4f17-ae4c-314f59aab97e,00000000-56ff-538b-2223-e1800b5e3ddb,fitbit,6011012:1,Fitbit,health-sleep,2012-01-19 07:49:00,2012-01-19 07:51:00,0.0,,,2017-10-11 18:05:32.162199-07,2017-10-12 15:09:13.597742-07,00:02:00,3


In [32]:
#Here we have our class of functions 

class UserInsightFunction():
    
    #initializing the class.
    def __init__(self, user_id, data_table):
        self.UserInsight = pd.DataFrame(columns=['_id','owner','source','category','type','message','data','upvote','downvote'])
        self.InsightTypes = []
        self.user_id = user_id
        self.data_table = data_table
        self.variables = {'health-sleep': 'sleep', 'health-bmi': 'Body Mass Index', 'health-weight': 'weight',
       'daily-summary:health-sleep': 'sleep', 'health-height': 'height',
       'health-fat-free-mass': 'fat-free mass', 'health-body-fat': 'body fat', 'health-fat-mass-weight': 'fat-mass-weight',
       'health-heart-rate': 'heart rate', 'health-step-distance': 'step distance', 'health-step-count': 'step count',
       'daily-summary:health-step-count': 'step-count', 'daily-summary:health-heart-rate': 'heart rate'}
    
    #***** main function. Pulls everything together *****
    def full_script(self):
        curr = self.create_insights(self.CorrelationTable(), self.user_id)
        curr2 = self.trends(curr)
        return curr2
    
    #Helper function for creating the correlation table
    def prepare_table(self, x, y,time='D',data_sample=data_sample):
        df1 = data_sample[data_sample['type'] == x]
        df2 = data_sample[data_sample['type'] == y]


        df1.index = df1['startDate']
        df2.index = df2['startDate']

        if x == "health-sleep":
            df1['value'] = pd.to_numeric(df1['duration'].dt.total_seconds())
            df1 = df1.loc[:,'value']
            df1 = df1.resample(time).mean()
        else:
            df1['value'] = pd.to_numeric(df1['value'])
            df1 = df1.loc[:,'value']
            if x == 'health-step-count':
                df1 = df1.resample(time).sum()
            else:
                df1 = df1.resample(time).mean()

        df2['value'] = pd.to_numeric(df2['value'])
        df2 = df2.loc[:,'value']
        if y == 'health-step-count':
            df2 = df2.resample(time).sum()
        else:
            df2 = df2.resample(time).mean()

        df1 = df1.to_frame('first')
        df1['second'] = df2

        return df1

    #function to generate correlations
    def corr_generator(self, steps):
        steps = steps.dropna()
        x = steps['first'].values
        y = steps['second'].values
        x = x.reshape(len(x), 1)
        y = y.reshape(len(x), 1)
        if not (x.shape == (0, 1) or y.shape == (0, 1)):
            regr = linear_model.LinearRegression()
            regr.fit(x, y)
            corr_info = stats.stats.pearsonr(x, y)
            r = corr_info[0][0]
            if corr_info[1] == 0.0:
                p = corr_info[1]
            else:
                p = corr_info[1][0]
            if (type(p) is np.ndarray):
                p = p[0]
            return r, p
        else:
            return 0, 1.1
    
    #function to create the correlation table
    def CorrelationTable(self): 
        #create table of all correlations in our data set for the user.
        CorrelationTable = pd.DataFrame(columns=['Variable1','Variable2','Overall','Last 30 Days','Last 60 Days','Last 90 Days', 'p-Overall','p-Last 30 Days','p-Last 60 Days','p-Last 90 Days'])
        data = self.data_table[self.data_table['owner'] == self.user_id]
        types = [types for types in data.type.unique()]
        data30 = data.last('30D')
        data7 = data.last('60D')
        data1 = data.last('90D')
        for type1, type2 in itertools.permutations(types, 2):
            # print(type1 + " " + type2)
            r, p = self.corr_generator(self.prepare_table(type1, type2, data_sample=data))
            #print(type1 + " " + type2 + " " + str(r) + " total")
            r30, p30 = self.corr_generator(self.prepare_table(type1, type2, data_sample=data30))
            #print(type1 + " " + type2 + " " + str(r30) + " 30")
            r7, p7 = self.corr_generator(self.prepare_table(type1, type2, data_sample=data7))
            #print(type1 + " " + type2 + " " + str(r7) + " 7")
            r1, p1 = self.corr_generator(self.prepare_table(type1, type2, data_sample=data1))
            #print(type1 + " " + type2 + " " + str(r1) + " 1")
            curRow = {'Variable1': type1, 'Variable2': type2, 'Overall': r, 'Last 30 Days': r30, 'Last 60 Days': r7, 'Last 90 Days': r1, 'p-Overall': p, 'p-Last 30 Days': p30, 'p-Last 60 Days': p7, 'p-Last 90 Days': p1}
            CorrelationTable = CorrelationTable.append(curRow, ignore_index=True)
        print(CorrelationTable)
        return CorrelationTable
        
    #creating text insights from data insights. 
    def create_insights(self,c_table, owner):

        def insight_from_corr(row): 
            variables = self.variables
            out = pd.DataFrame()
            source = 'ongo'
            category = row.Variable2
            upvote, downvote = False, False
            for time_range in ['Overall', 'Last 30 Days', 'Last 60 Days', 'Last 90 Days']:
                val = row[time_range]
                p_val = row['p-' + time_range]
                if abs(val) > 0.5 and p_val <= 0.05:
                    direction = 'positive' if val > 0 else 'negative'
                    type_ = str(row['Variable1']) +':'+ str(row['Variable2']) +':'+ direction +':'+ time_range
                    if time_range == 'Overall':
                        if direction == 'positive':
                            message1 = 'Your overall ' + variables[row['Variable1']] + ' and your overall ' + variables[row['Variable2']] + ' are correlated positively.'
                            message2 = ' An increase in ' + variables[row['Variable1']] + ' will continue to increase your ' + variables[row['Variable2']] + '.'
                        if direction == 'negative':
                            message1 = 'Your overall ' + variables[row['Variable1']] + ' and your overall ' + variables[row['Variable2']] + ' are correlated negatively.'
                            message2 = ' An increase in ' + variables[row['Variable1']] + ' will continue to decrease your ' + variables[row['Variable2']] + '.'
                    else:
                        if direction == 'positive':
                            message1 = 'Your ' + variables[row['Variable1']] + ' in the ' + time_range + ' and your ' + variables[row['Variable2']] + ' in the ' + time_range + ' are correlated positively.'
                            message2 = ' An increase in ' + variables[row['Variable1']] + ' will continue to increase your ' + variables[row['Variable2']] + '.'
                        if direction == 'negative':
                            message1 = 'Your ' + variables[row['Variable1']] + ' in the ' + time_range + ' and your ' + variables[row['Variable2']] + ' in the ' + time_range + ' are correlated negatively.'
                            message2 = ' An increase in ' + variables[row['Variable1']] + ' will continue to decrease your ' + variables[row['Variable2']] + '.'
                    message = message1 + message2
                    insight = {
                        'owner': owner,
                        'source': source,
                        'category': category,
                        'type': type_,
                        'message': message
                    }
                    data = insight.copy()
                    data.update({
                        'R': val,
                        'p': p_val
                    })
                    print(p_val)
                    insight.update({
                        'upvote': upvote,
                        'downvote': downvote,
                        'data': json.dumps(data)
                    })
                    out = out.append(pd.DataFrame(insight, index = [len(out)]))
                else:
                    insight_ = {
                        'owner': 'N/A',
                        'source': 'N/A',
                        'category': 'N/A',
                        'type': 'N/A',
                        'message': 'N/A',
                        'upvote': 'N/A',
                        'downvote': 'N/A',
                        'data': 'N/A'
                    }
                    out = out.append(pd.DataFrame(insight_, index = [len(out)]))
            return out

        df = pd.DataFrame()
        for index, row in c_table.iterrows():
            df = df.append(insight_from_corr(row), ignore_index = True)
        df._id = df.index
        return df
    
    #identifying trends in weekly activity, currrently only min/max day of week 
    def trends(self,table):
    #test user '00000000-56ff-538b-2223-e1800b5e3ddb'
        data_sample = self.data_table
        owner = data_sample[data_sample['owner'] == '00000000-56ff-538b-2223-e1800b5e3ddb'].iloc[1,:]
        x = data_sample
        trends_insights = pd.DataFrame(columns=['_id','owner','source','category','type','message','data','upvote','downvote'])
        x = x[x['type'].isin(['health-sleep','health-step-count'])]
        x['dayofweek'] = x['startDate'].dt.weekday_name
        x = x.loc[:,['type','dayofweek','value']].groupby(['type','dayofweek']).count()
        sleep = x.loc['health-sleep'].sort_values(by='value',ascending = False)
        steps = x.loc['health-step-count'].sort_values(by='value',ascending = False)


    # -- part of function that creates insight arrays -- 
        def foo(i,table):
            columns=['_id','owner','source','category','type','message','data','upvote','downvote']   
            if i is sleep:
                message = 'sleep:dayofweek'
            else:
                message = 'steps:dayofweek'
            trends = pd.DataFrame([],columns=columns)


            max_blob = {}
            max_blob['Max activity'] = i['value'][0]
            max_blob['Max day'] = i.index[0]
            min_blob = {}
            min_blob['Min activity'] = i['value'][6]
            min_blob['Min day'] = i.index[6]

            columns=['_id','owner','source','category','type','message','data','upvote','downvote']   


            sleep_insight_max = [owner['_id'],owner['owner'],'ongo','Day of Week Trends',message,'You are most active on ' + str(i['value'][0]),max_blob,'N/A','N/A']
            sleep_insight_min = [owner['_id'],owner['owner'],'ongo','Day of Week Trends',message,'You are least active on ' + str(i['value'][6]),min_blob,'N/A','N/A']


            table = table.append(pd.DataFrame([sleep_insight_max],columns=columns))
            table = table.append(pd.DataFrame([sleep_insight_min],columns=columns))
            return table
        last = foo(steps,table).append(foo(sleep,table))
        last = last[last['category'].isnull() == False]
        last = last[last['category'] != 'N/A'].reset_index()
        return last 

In [33]:
#This block will generate the table of insights. 
# Note: Current error is not generating data JSON blob.

userinsights = UserInsightFunction('00000000-56ff-538b-2223-e1800b5e3ddb', data_sample)
userinsights.full_script()

                 Variable1               Variable2   Overall  Last 30 Days  \
0             health-sleep           health-weight  0.314659      0.643842   
1             health-sleep              health-bmi  0.301354      0.615350   
2             health-sleep           health-height  0.000000      0.000000   
3             health-sleep         health-body-fat  0.052344      0.463823   
4             health-sleep       health-heart-rate -0.120387     -0.284451   
5             health-sleep  health-fat-mass-weight  0.455105      0.570986   
6             health-sleep    health-fat-free-mass -0.103878      0.127454   
7             health-sleep    health-step-distance  0.114859      0.356872   
8             health-sleep       health-step-count -0.114460      0.128694   
9            health-weight            health-sleep -0.503708     -0.809139   
10           health-weight              health-bmi  0.999192      0.975388   
11           health-weight           health-height       NaN    

2.87870092211e-13
4.30847443814e-20
2.77118450014e-33
1.4670547677e-25
4.76238242146e-101
0.00459795546285
0.000411976957587
2.17373509274e-06
0.0352483906101
0.00584252028863
9.9173159652e-06
0.0012150869236
9.42545717246e-05
1.27078385391e-05
0.0
2.87870092211e-13
4.30847443814e-20
2.77118450014e-33
1.12671457869e-25
2.45961122115e-101
0.00862244015278
0.000675411603642
3.43360405062e-06
0.0187566979305
0.00355340051641
5.41800098775e-06
0.0245733764983
0.0156858598522
0.000679640567304
1.4670547677e-25
1.12671457869e-25
1.19818440639e-264
6.58967986814e-10
1.47939180191e-12
7.79656549922e-17
0.021674404998
1.16865391835e-09
0.00361550676538
0.00138276832968
1.52021079425e-05
4.76238242146e-101
0.00459795546285
0.000411976957587
2.17373509274e-06
2.45961122115e-101
0.00862244015278
0.000675411603642
3.43360405062e-06
1.19818440639e-264
6.58967986814e-10
1.47939180191e-12
7.79656549922e-17
0.0352483906101
0.00584252028863
9.9173159652e-06
0.0187566979305
0.00355340051641
5.41800098775

Unnamed: 0,index,_id,category,data,downvote,message,owner,source,type,upvote
0,1,,health-weight,"{""category"": ""health-weight"", ""source"": ""ongo""...",False,Your sleep in the Last 30 Days and your weight...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-sleep:health-weight:positive:Last 30 Days,False
1,2,,health-weight,"{""category"": ""health-weight"", ""source"": ""ongo""...",False,Your sleep in the Last 60 Days and your weight...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-sleep:health-weight:positive:Last 60 Days,False
2,5,,health-bmi,"{""category"": ""health-bmi"", ""source"": ""ongo"", ""...",False,Your sleep in the Last 30 Days and your Body M...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-sleep:health-bmi:positive:Last 30 Days,False
3,21,,health-fat-mass-weight,"{""category"": ""health-fat-mass-weight"", ""source...",False,Your sleep in the Last 30 Days and your fat-ma...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-sleep:health-fat-mass-weight:positive:L...,False
4,36,,health-sleep,"{""category"": ""health-sleep"", ""source"": ""ongo"",...",False,Your overall weight and your overall sleep are...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-sleep:negative:Overall,False
5,37,,health-sleep,"{""category"": ""health-sleep"", ""source"": ""ongo"",...",False,Your weight in the Last 30 Days and your sleep...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-sleep:negative:Last 30 Days,False
6,38,,health-sleep,"{""category"": ""health-sleep"", ""source"": ""ongo"",...",False,Your weight in the Last 60 Days and your sleep...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-sleep:negative:Last 60 Days,False
7,39,,health-sleep,"{""category"": ""health-sleep"", ""source"": ""ongo"",...",False,Your weight in the Last 90 Days and your sleep...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-sleep:negative:Last 90 Days,False
8,40,,health-bmi,"{""category"": ""health-bmi"", ""source"": ""ongo"", ""...",False,Your overall weight and your overall Body Mass...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-bmi:positive:Overall,False
9,41,,health-bmi,"{""category"": ""health-bmi"", ""source"": ""ongo"", ""...",False,Your weight in the Last 30 Days and your Body ...,00000000-56ff-538b-2223-e1800b5e3ddb,ongo,health-weight:health-bmi:positive:Last 30 Days,False
