In [35]:
from datetime import datetime
from sklearn.impute import KNNImputer
from sklearn import preprocessing
import numpy as np
import pandas as pd
import json

In [2]:
data = pd.read_csv('myFitnessPal_parsed.csv')
data.head()

Unnamed: 0,user_id,date,sequence,food_ids,total_calories,total_carbs,total_fat,total_protein,total_sodium,total_sugar,goal_calories,goal_carbs,goal_fat,goal_protein,goal_sodium,goal_sugar
0,1,2014-09-15,1,"[1, 2, 3, 4, 4]",2430,96,37.0,50.0,855.0,63.0,1572.0,196.0,52.0,79.0,2300.0,59.0
1,1,2014-09-16,1,"[5, 1, 2, 3, 6, 7]",1862,158,54.0,114.0,2215.0,100.0,1832.0,229.0,61.0,92.0,2300.0,69.0
2,1,2014-09-17,1,"[1, 2, 3, 6, 8, 9, 10]",2251,187,60.0,98.0,1765.0,105.0,1685.0,210.0,56.0,85.0,2300.0,63.0
3,1,2014-09-18,1,"[1, 6, 2, 3, 11, 12]",2001,113,81.0,202.0,1101.0,71.0,1597.0,199.0,53.0,80.0,2300.0,60.0
4,1,2014-09-19,1,"[1, 7, 13, 12, 2, 3, 12, 12]",2158,180,89.0,115.0,1998.0,84.0,1589.0,198.0,53.0,80.0,2300.0,60.0


In [3]:
with open('foods.json') as json_file:
     foods = json.load(json_file)

FileNotFoundError: [Errno 2] No such file or directory: 'foods.json'

In [3]:
# # split date and create separated columns
# data["year"] = data["date"].apply(lambda x: int(x[:4]))
# data["month"] = data["date"].apply(lambda x: int(x[5:7]))
# data["day"] = data["date"].apply(lambda x: int(x[-2:]))
# data = data.drop(columns = 'date')

In [4]:
null_value_frequency = data.isna().sum()
null_value_frequency[null_value_frequency!=0] 

total_fat          877
total_protein      877
total_sodium      1305
total_sugar       1307
goal_calories     1922
goal_carbs        1925
goal_fat         27313
goal_protein     27318
goal_sodium      67720
goal_sugar       67990
dtype: int64

In [5]:
# As we saw there are 10 columns which have sometimes null values so :
#In this cell we drop rows which have null values in more than 3 columns

nullColumns= data.loc[data.isnull().sum(axis=1)>3].index
nullColumns=pd.DataFrame(nullColumns,columns={'index'})
print('Values which have null in more than 3 columns : ',(len(nullColumns)/len(data))*100,'%')

cond = data.index.isin(nullColumns['index']) 
data.drop(data[cond].index, inplace = True)

Values which have null in more than 3 columns :  4.6515073588266755 %


In [6]:
#just check how columns null distribution has changed
null_value_frequency = data.isna().sum()
null_value_frequency[null_value_frequency!=0] 

goal_protein        5
goal_sodium     40407
goal_sugar      40677
dtype: int64

In [7]:
# seems like there are people who have set goal_calories to zero and it should also be the mistake
# so lets drop those ones too

zeros= data[data['goal_calories']==0].index
zeros=pd.DataFrame(zeros,columns={'index'})
print('Examples which have zero in goal_calories : ',(len(zeros)/len(data))*100,'%')

cond = data.index.isin(zeros['index']) 
data.drop(data[cond].index, inplace = True)

Examples which have zero in goal_calories :  0.09484293759477595 %


In [8]:
# create new column based on foods quantity
data['foods_len'] = data["food_ids"].apply(lambda x: len(x[1:-1].split(',')))

In [9]:
# add new feature which shows how many days are logged by each applicants
user_logged_freq = data["user_id"].value_counts()
user_logged_df = pd.DataFrame(data["user_id"].unique(),columns = ['user_id'])
user_logged_df["logged_frequency"] = user_logged_df["user_id"].apply(lambda _id: user_logged_freq[_id])

In [10]:
# this function gets all nutrition values with goals and checks if calories difference is less than percentage of goals
# TODO:
def check_bounds(total_calories, total_carbs, total_fat, total_protein, total_sodium, total_sugar, 
               goal_calories, goal_carbs, goal_fat, goal_protein, goal_sodium, goal_sugar,percent):
    
    return (abs(goal_calories - total_calories) < goal_calories * percent / 100)

In [11]:
# this function checks last days for user and counts number of days when his nutrient was in goal range
def reach_goal(df,user_id,num_days):
    allowed_difference_percentage = 15
    tails = df[df["user_id"]==user_id].tail(num_days)
    tails["reach_goal"] = tails.apply(lambda row: check_bounds(*(row.values[4:16]),allowed_difference_percentage),axis=1)
    return tails["reach_goal"].sum()

In [12]:
# create new feature which shows if user reaches goals in last days 
# and return 1 if the number of days when user reached goal is greater than threshold else 0
# TODO:

number_of_last_days = 5
threshold = 2
user_logged_df["reach_goal"] = user_logged_df['user_id'].apply(lambda x: reach_goal(data,x,number_of_last_days))
user_logged_df["reach_goal"] = user_logged_df["reach_goal"].apply(lambda x: 1 if x>=threshold else 0)

In [13]:
# this function counts whole days between start and end date and calculates missed days for new feature
def days_missed(d1, d2,loggedDays):
    d1 = datetime.strptime(str(d1), "%Y-%m-%d")
    d2 = datetime.strptime(str(d2), "%Y-%m-%d")
    return abs(abs((d2 - d1).days)-loggedDays)

In [14]:
# this function calls days_missed regarding last and first records
def get_missed_days(df,userID,logged_frequency):
    tail = df[df["user_id"]==userID].tail(1)['date'].values[0]
    head = df[df["user_id"]==userID].head(1)['date'].values[0]
    return days_missed(tail,head,logged_frequency)   

In [15]:
# add new feature based on how many days are missed for each user
user_logged_df['days_missed'] = user_logged_df[['user_id','logged_frequency']].apply(lambda x: get_missed_days(data,x.user_id,x.logged_frequency),axis=1)

In [16]:
def getHealtyDistributedValues(value,lower,upper):
    if(value<=lower):
        return lower-value # the difference person lacked 
    if(value >= upper):
        return value-upper # the difference person exceed 
    return 0 # method returns 0 for the values in range

In [17]:
# There is known that for healthy eating, daily carbs should be between 45-65% out of callories, fats between 10-35% and proteins 20-35%
# So this method counts daily norm distribution of person
# 0 for the people in persmissible range
# max value may be 1.3 (when person only took fats)

def healthyDistributed(carbs,fat,protein):
    totalCalories = fat*9+ carbs*4 + protein*4 # convert to calories (1g fat = 9 calories and etc..) and sum
    sum =  getHealtyDistributedValues ( carbs*4 / (totalCalories+0.00000001),0.45,0.65)
    sum += getHealtyDistributedValues ( protein*4 / (totalCalories+0.00000001),0.2,0.35)
    sum += getHealtyDistributedValues ( fat*9 / (totalCalories+0.00000001),0.1,0.35)
    return sum

In [18]:
#new column in data based on healthydistributed method to determine user's behavior
data['healtyDistrib']=data[['total_carbs','total_fat','total_protein']].apply(lambda x: healthyDistributed(x.total_carbs,x.total_fat,x.total_protein),axis=1)

In [19]:
data=data.drop(columns=['date','food_ids','sequence'])

In [20]:
# we need to have the same number of rows for each user so
# this function checks if number of rows for each user are greater than num_rows
# if it's greater, then this function returns the lasts rows 
# if less, then this fills them with -1

def row_padding(x,num_row):
    # get last row because I need same format and same id, others columns replaced by -1
    last_row = x.iloc[-1] 
    last_row[1:] = [-1]*len(last_row[1:])
    
    if np.shape(x)[0] < num_row:
        new_x = pd.DataFrame(x)
        for i in range(np.shape(x)[0],num_row):
            new_x = pd.DataFrame(new_x.append(last_row))
        return new_x
    else:
        return x.tail(num_row)

In [21]:
# create new columns based on goal and total nutritient
data["calories_diff"] = data["goal_calories"]-data["total_calories"]
data["carbs_diff"] = data["goal_carbs"]-data["total_carbs"]
data["fat_diff"] = data["goal_fat"]-data["total_fat"]
data["protein_diff"] = data["goal_protein"]-data["total_protein"]
data["sodium_diff"] = data["goal_sodium"]-data["total_sodium"]
data["sugar_diff"] = data["goal_sugar"]-data["total_sugar"]

In [22]:
# this function flattens all rows for each user which we padded already 
# so creates one vector because we need one input for each user
def flatten_rows(x,cols):
    for i in range(1,x.shape[0]):
        for j in range(1,len(cols)):
            temp_row = x.iloc[i]
            x[cols[j]+"_"+str(i)] = temp_row[j]
    return x.head(1)

In [23]:
data = data.groupby('user_id').apply(row_padding,5).reset_index(drop=True)

In [24]:
cols = data.columns
data = data.groupby('user_id').apply(flatten_rows,cols).reset_index(drop=True)

In [25]:
data

Unnamed: 0,user_id,total_calories,total_carbs,total_fat,total_protein,total_sodium,total_sugar,goal_calories,goal_carbs,goal_fat,...,goal_sodium_4,goal_sugar_4,foods_len_4,healtyDistrib_4,calories_diff_4,carbs_diff_4,fat_diff_4,protein_diff_4,sodium_diff_4,sugar_diff_4
0,1.0,4122.0,464.0,196.0,168.0,5787.0,125.0,4578.0,572.0,153.0,...,2300.0,87.0,6.0,0.081410,1121.0,176.0,34.0,18.0,1162.0,26.0
1,2.0,1548.0,153.0,52.0,44.0,1752.0,100.0,1320.0,165.0,44.0,...,2300.0,50.0,2.0,0.000000,1264.0,154.0,43.0,59.0,2193.0,40.0
2,3.0,1468.0,158.0,41.0,55.0,1588.0,15.0,1486.0,185.0,49.0,...,2300.0,25.0,6.0,0.076138,2.0,-22.0,10.0,26.0,593.0,14.0
3,4.0,645.0,63.0,39.0,27.0,1454.0,192.0,1450.0,181.0,73.0,...,2300.0,3500.0,8.0,0.085167,4.0,-24.0,14.0,4.0,1182.0,1440.0
4,5.0,3754.0,196.0,33.0,124.0,2569.0,90.0,3399.0,212.0,46.0,...,2300.0,90.0,14.0,0.000000,155.0,45.0,12.0,56.0,-844.0,-32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9396,9893.0,1140.0,47.0,49.0,13.0,2000.0,8.0,1932.0,97.0,242.0,...,2300.0,73.0,1.0,0.650000,1122.0,28.0,244.0,62.0,2140.0,73.0
9397,9894.0,1873.0,237.0,75.0,64.0,50.0,29.0,1660.0,208.0,55.0,...,-1.0,-1.0,-1.0,-1.000000,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
9398,9895.0,1755.0,221.0,54.0,78.0,8.0,85.0,2024.0,202.0,56.0,...,44.0,44.0,19.0,0.118536,-132.0,-21.0,-46.0,80.0,27.0,-20.0
9399,9896.0,338.0,20.0,30.0,11.0,8.0,18.0,1290.0,43.0,161.0,...,25.0,48.0,5.0,0.452941,734.0,21.0,139.0,-8.0,14.0,36.0


In [26]:
# merge two new features with main data
data = pd.merge(data, user_logged_df, on=['user_id'])

In [27]:
null_value_frequency = data.isna().sum()
null_value_frequency[null_value_frequency!=0] 

goal_sodium       683
goal_sugar        688
sodium_diff       683
sugar_diff        688
goal_protein_1      1
goal_sodium_1     649
goal_sugar_1      652
protein_diff_1      1
sodium_diff_1     649
sugar_diff_1      652
goal_sodium_2     634
goal_sugar_2      638
sodium_diff_2     634
sugar_diff_2      638
goal_protein_3      1
goal_sodium_3     615
goal_sugar_3      617
protein_diff_3      1
sodium_diff_3     615
sugar_diff_3      617
goal_sodium_4     605
goal_sugar_4      610
sodium_diff_4     605
sugar_diff_4      610
dtype: int64

In [28]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9401 entries, 0 to 9400
Columns: 104 entries, user_id to days_missed
dtypes: float64(101), int64(3)
memory usage: 7.5 MB


In [29]:
# handle missing data with KNNImputer
imputer = KNNImputer()
data[data.columns] = np.round(imputer.fit_transform(data))
data[data.columns] = np.round(imputer.fit_transform(data))

In [36]:
# some of the features had wide range of values so lets scale them
scaler = preprocessing.MinMaxScaler()
columns=data.columns.drop('user_id')
scaled_df = scaler.fit_transform(data[columns])
scaler_df = pd.DataFrame(scaled_df, columns=columns)
data[columns] = scaled_df

In [37]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [38]:
data.describe()

Unnamed: 0,user_id,total_calories,total_carbs,total_fat,total_protein,total_sodium,total_sugar,goal_calories,goal_carbs,goal_fat,goal_protein,goal_sodium,goal_sugar,foods_len,healtyDistrib,calories_diff,carbs_diff,fat_diff,protein_diff,sodium_diff,sugar_diff,total_calories_1,total_carbs_1,total_fat_1,total_protein_1,total_sodium_1,total_sugar_1,goal_calories_1,goal_carbs_1,goal_fat_1,goal_protein_1,goal_sodium_1,goal_sugar_1,foods_len_1,healtyDistrib_1,calories_diff_1,carbs_diff_1,fat_diff_1,protein_diff_1,sodium_diff_1,sugar_diff_1,total_calories_2,total_carbs_2,total_fat_2,total_protein_2,total_sodium_2,total_sugar_2,goal_calories_2,goal_carbs_2,goal_fat_2,goal_protein_2,goal_sodium_2,goal_sugar_2,foods_len_2,healtyDistrib_2,calories_diff_2,carbs_diff_2,fat_diff_2,protein_diff_2,sodium_diff_2,sugar_diff_2,total_calories_3,total_carbs_3,total_fat_3,total_protein_3,total_sodium_3,total_sugar_3,goal_calories_3,goal_carbs_3,goal_fat_3,goal_protein_3,goal_sodium_3,goal_sugar_3,foods_len_3,healtyDistrib_3,calories_diff_3,carbs_diff_3,fat_diff_3,protein_diff_3,sodium_diff_3,sugar_diff_3,total_calories_4,total_carbs_4,total_fat_4,total_protein_4,total_sodium_4,total_sugar_4,goal_calories_4,goal_carbs_4,goal_fat_4,goal_protein_4,goal_sodium_4,goal_sugar_4,foods_len_4,healtyDistrib_4,calories_diff_4,carbs_diff_4,fat_diff_4,protein_diff_4,sodium_diff_4,sugar_diff_4,logged_frequency,reach_goal,days_missed
count,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0,9401.0
mean,4940.44708,0.002467,0.002915,0.005656,0.00907,0.001173,0.048571,0.181904,0.042373,0.018789,0.033951,0.139528,0.089999,0.175281,0.146261,0.986391,0.913622,0.659424,0.594557,0.767506,0.522491,0.118844,0.014055,0.00973,0.012705,0.036921,0.049928,0.242283,0.040566,0.018147,0.032078,0.134061,0.08379,0.243015,0.547708,0.659537,0.614693,0.457357,0.608906,0.764414,0.542383,0.034237,0.025139,0.01028,0.013546,0.065431,0.014156,0.237713,0.011846,0.022371,0.014811,0.129331,0.03004,0.152682,0.530741,0.849533,0.237444,0.499978,0.355921,0.627023,0.608207,0.091254,0.022422,0.011253,0.013905,0.034706,0.036105,0.229992,0.038177,0.023086,0.03054,0.126295,0.046404,0.18414,0.518509,0.741216,0.51563,0.548267,0.507534,0.754038,0.456686,0.015908,0.022853,0.009725,0.013184,0.008432,0.02859,0.222685,0.04728,0.022221,0.029646,0.123306,0.066749,0.181448,0.514467,0.909188,0.489896,0.533908,0.521641,0.902129,0.669013,0.314506,0.347197,0.316005
std,2861.680298,0.010371,0.010881,0.021989,0.026142,0.010394,0.105128,0.072242,0.045847,0.049553,0.07786,0.112642,0.171322,0.123603,0.353387,0.010285,0.01003,0.016241,0.029369,0.036313,0.054278,0.070207,0.020251,0.029166,0.034582,0.04912,0.111978,0.10948,0.045485,0.04849,0.073953,0.113769,0.166966,0.149648,0.207147,0.055748,0.015976,0.021553,0.031023,0.033492,0.062801,0.023879,0.036439,0.030882,0.036663,0.087252,0.034264,0.116688,0.01667,0.060781,0.035973,0.114618,0.061742,0.100006,0.223753,0.022013,0.011408,0.025066,0.021247,0.051255,0.017598,0.062034,0.03443,0.034554,0.035137,0.048066,0.085765,0.120193,0.043979,0.063483,0.07285,0.114945,0.100498,0.128741,0.237514,0.047973,0.021942,0.021973,0.042992,0.032636,0.038231,0.015909,0.033709,0.033795,0.037686,0.016109,0.069823,0.12164,0.055743,0.061844,0.072397,0.115566,0.150375,0.139324,0.252231,0.015708,0.024205,0.02242,0.037249,0.014147,0.048523,0.293142,0.476104,0.277147
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2461.0,0.001453,0.001213,0.001854,0.003333,2.6e-05,0.002261,0.144572,0.028222,0.009574,0.014255,0.0051,0.008491,0.083333,0.0,0.985788,0.91254,0.657368,0.588246,0.754701,0.508671,0.066293,0.005553,0.003143,0.004225,0.000801,0.002165,0.195171,0.025328,0.00936,0.013614,0.0044,0.008678,0.121951,0.5,0.632385,0.609183,0.453906,0.601918,0.752242,0.526106,0.017135,0.008793,0.003048,0.00418,0.001258,0.000541,0.189003,0.006704,0.011287,0.006173,0.0039,0.003236,0.079365,0.5,0.839752,0.234562,0.496271,0.351115,0.608213,0.604601,0.040188,0.007,0.002888,0.003821,0.000601,0.001233,0.186926,0.020218,0.011711,0.012976,0.0032,0.005029,0.08,0.5,0.716853,0.50801,0.544733,0.497001,0.740602,0.448147,0.005693,0.006309,0.001713,0.00272,0.000117,0.000713,0.188009,0.021994,0.011711,0.012976,0.0026,0.006225,0.069767,0.5,0.900155,0.478358,0.529435,0.512177,0.896428,0.656054,0.064516,0.0,0.028736
50%,4933.0,0.00237,0.002447,0.003796,0.006444,0.000444,0.005585,0.17802,0.039111,0.012128,0.019362,0.23,0.012264,0.166667,0.0,0.98645,0.913511,0.658824,0.590789,0.756203,0.511107,0.121988,0.012439,0.006948,0.0091,0.011869,0.005775,0.244854,0.038658,0.011912,0.018932,0.230077,0.011885,0.219512,0.5,0.655599,0.613533,0.455968,0.604982,0.75347,0.52895,0.035251,0.021472,0.007172,0.009631,0.016768,0.001583,0.243986,0.011284,0.01505,0.008761,0.230077,0.004362,0.142857,0.5,0.848169,0.236654,0.498343,0.353124,0.609783,0.605241,0.09509,0.01854,0.007838,0.010054,0.007046,0.004037,0.241089,0.037103,0.01571,0.018294,0.230077,0.00736,0.18,0.5,0.736369,0.513491,0.546684,0.501604,0.741614,0.449638,0.014477,0.017714,0.005997,0.008161,0.001183,0.00285,0.234972,0.046844,0.015139,0.017869,0.230077,0.010941,0.162791,0.5,0.908885,0.487782,0.532258,0.516661,0.896913,0.658234,0.22043,0.0,0.275862
75%,7421.0,0.003104,0.003723,0.005826,0.009778,0.001824,0.013298,0.217913,0.050222,0.015957,0.02766,0.23,0.018868,0.25,0.0,0.987419,0.914774,0.660354,0.593772,0.789199,0.514224,0.161984,0.018992,0.010918,0.0143,0.063008,0.01357,0.29918,0.049322,0.015954,0.027441,0.230077,0.019053,0.341463,0.5,0.693762,0.619816,0.458637,0.608343,0.784314,0.53259,0.047358,0.034356,0.011834,0.015446,0.112485,0.003748,0.300062,0.014669,0.019887,0.012843,0.230077,0.007106,0.206349,0.5,0.861821,0.240073,0.501381,0.355393,0.658244,0.606132,0.130234,0.031404,0.012995,0.016489,0.060135,0.009643,0.296187,0.048212,0.020851,0.026803,0.230077,0.01239,0.26,0.5,0.77088,0.523328,0.549675,0.506486,0.774341,0.451737,0.024055,0.033002,0.011137,0.015643,0.013948,0.007228,0.289762,0.06084,0.02028,0.026165,0.230077,0.017733,0.255814,0.5,0.917716,0.498483,0.535215,0.520659,0.910259,0.660851,0.489247,1.0,0.545977
max,9897.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## data for prediction if user reach goals

In [32]:
# choosing users which have more logs than number_of_logs
number_of_logs = 60
data_2 = data[data["logged_frequency"]>number_of_logs]

# update dataframe

In [1]:
data.to_csv(r'myFitnessPal_parsed.csv',index = False)

NameError: name 'data' is not defined