In [3]:
### Approach 1. Rescaling lat and lons ###
def approachAvg1(lm_data, users):
    data = list()
    lat_scaler = MinMaxScaler(feature_range=(0, 1))
    lon_scaler = MinMaxScaler(feature_range=(0, 1))
    for u in users:
        u_data = [d for d in lm_data if d[0] == u]

        if len(u_data) < 14:  
            continue
            
        # get lats & lons
        lats, lons = [], []
        for ud in u_data:
            for traj in ud[5]:
                for l in traj:
#                     lats.append(l[0])
#                     lons.append(l[1])
                    lats.append(round(float(l[0]), 3)) # lowering precision
                    lons.append(round(float(l[1]), 3))
        
        # normalise distances
        lat_scaler.fit(lats)
        lon_scaler.fit(lons)
        
        # flattening
        for ud in u_data:
            loc_flat = []
            for traj in ud[5]:
                for l in traj: 
                    lat = lat_scaler.transform([l[0]])[0]
                    lon = lon_scaler.transform([l[1]])[0]
                    loc_flat.append(lat)
                    loc_flat.append(lon)

            data.append([ud[0], ud[1], ud[2], ud[3], ud[4], loc_flat])
    return data

In [2]:
### Approach 2. Computing displacement changes ###
def approachAvg2(lm_data, users):
    data = list()
    for u in users:
        u_data = [d for d in lm_data if d[0] == u]

        if len(u_data) < 14:  
            continue
            
         #compute distances
        for ud in u_data:
            dist_list = [0]*(len(ud[5][0])-1)
            for traj in ud[5]:
                counter = 0
                prev_loc = traj[0]
                prev_dist = 0
                for l in traj[1:]: #starting from 2nd element
                    dist = abs(Distance.haversine(prev_loc, l))

                    if prev_dist == dist:
                        dist_ratio = 0
                    else:
                        dist_ratio = abs(prev_dist - dist)/max(prev_dist, dist)

                    dist_list[counter] += dist_ratio
                    counter += 1
                    prev_loc = l
                    prev_dist = dist
            
            dist_list = [dl/len(ud[5]) for dl in dist_list]
            data.append([ud[0], ud[1], ud[2], ud[3], ud[4], dist_list])
    return data

In [4]:
### Approach 3. Computing displacement [normalised] ### 
def approachAvg3(lm_data, users):
    data = list()
    scaler = MinMaxScaler(feature_range=(0, 1))
    for u in users:
        u_data = [d for d in lm_data if d[0] == u]

        if len(u_data) < 14:  
            continue
        
        #compute distances
        dist_list = []
        for ud in u_data:
            for traj in ud[5]:
                prev_loc = traj[0]
                for l in traj[1:]: #starting from 2nd element
                    dist_list.append(abs(Distance.haversine(prev_loc, l)))
                    prev_loc = l
        
        # fit normaliser
        scaler.fit(dist_list)
        
        # compute normalised distances
        for ud in u_data:
            dist_list = [0]*(len(ud[5][0])-1)
            for traj in ud[5]:
                counter = 0
                prev_loc = traj[0]
                for l in traj[1:]: #starting from 2nd element
                    dist = abs(Distance.haversine(prev_loc, l))
                    dist = scaler.transform([dist])[0]
                    
                    dist_list[counter] += dist
                    counter += 1
                    prev_loc = l
                    

            dist_list = [dl/len(ud[5]) for dl in dist_list]
            data.append([ud[0], ud[1], ud[2], ud[3], ud[4], dist_list])
    return data

In [19]:
import collections

### Approach 4. Computing time spent at top 10 places ### 
def approachAvg4(lm_data, users, topn=10, traj_days=14):
    data = list()
    scaler = MinMaxScaler(feature_range=(0, 1))
    for u in users:
        u_data = [d for d in lm_data if d[0] == u]

        if len(u_data) < 14:  
            continue
        
        # get unique places 
        latlons = []
        for ud in u_data:
            for traj in ud[5]:
                for l in traj:
#                     latlon = [l[0],l[1]]
                    latlon = [round(float(l[0]), 3), round(float(l[1]), 3)] # lowering precision
                    latlons.append(latlon)
        
        #print len(latlons)
        unique_locs = unique_data = [list(x) for x in set(tuple(x) for x in latlons)]
        #print len(unique_locs)
        
        # compute frequencies 
        locs_freq = []
        for loc in unique_locs:
            f = latlons.count(loc)
            locs_freq.append([loc, f])
        
        # get topn locs
        locs_freq = sorted(locs_freq, key=lambda x: x[1], reverse=True)
        top_n_locs = locs_freq[:topn]
        top_n_locs = [l[0] for l in top_n_locs]
        
        
        # compute normalised time spent at each of top_n_locs
        for ud in u_data:
            time_spent = [0]* (topn + 1) # vector for topn + 1 places. One extra for all other places
            for traj in ud[5]:
                for l in traj:
                    l = [round(float(l[0]), 3), round(float(l[1]), 3)] # lowering precision
                    if top_n_locs.count(l) == 0:
                        l_index = topn # last index
                    else:
                        l_index = top_n_locs.index(l) # find index of the item 
                    time_spent[l_index] += 30 # increase the stay time for that place (30mins as the time series)
            
            time_spent = [t/(topn*24*60.0) for t in time_spent] # computing the ratio to the overall time 
            data.append([ud[0], ud[1], ud[2], ud[3], ud[4], time_spent])
    return data
    
