In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import time
from geopandas import GeoDataFrame
from utils.basefuncs import preProcessDataFrame
from utils.trajectoryClass import Trajectory
from utils.uuid import UUIDCollection
from utils.stopCollection import stopCollection,ExtractAndOrganizeData
from utils.gravityModel import POIgdf,activityMapper,gravityModel
from datetime import datetime,timedelta
import movingpandas as mpd
import warnings
import folium

In [2]:
warnings.filterwarnings("ignore")

In [3]:
path = r"D:\MSc\MSBD 5014\FindingPointsOfInterestFromGPS"
compiled_path = Path(path,"Configs.xlsx").resolve()
raw_path = Path(r"D:\MSc\MSBD 5014\Data","rawdata_202212.csv").resolve()

In [4]:
df_raw = pd.read_csv(raw_path)

In [5]:
preprocess = preProcessDataFrame(compiled_path,sheetname='preProcessDataFrame')
df = preprocess.processDF()

In [6]:
to_datetime = lambda x: datetime(year = x['ts_Year'],month = x['ts_Month'],day = x['ts_Day'],hour=x['ts_Hour'],minute = x['ts_Minute'],second = x['ts_Second'])

In [7]:
df['ts_Index'] = df.apply(to_datetime,axis = 1)

In [8]:
configs = {
    'col_missing_vals' : ['gpstime_Hour',
        'gpstime_Minute',
        'gpstime_Second',
        'gpstime_Day',
        'gpstime_Month',
        'gpstime_Year',
        'gpstime'],
    'fill_missing_vals' : [
        'ts_Hour',
       'ts_Minute', 
       'ts_Second', 
       'ts_Day', 
       'ts_Month', 
       'ts_Year', 
       'ts'],
    'UUID_Collection':{
        'unique_identifier_col' : 'uuid',
        'index_col' : 'ts_Index',
        'sort_values_col' : 'ts',
         'lat_col' : 'latitude',
        'long_col' : 'longitude',
        'required_cols' : ['gpsacc'],
        'min_duration' : timedelta(seconds=30*60),
        'max_diameter' : 25,
        'min_points' : 200,
        'query_amount' : 10,
        'plot_map' : False
    },
    'STOP_Collection':{
        'min_distance' : 200,
        'stop_point_num' : 5
    },
    'POIgdf':{
        'poi_type_col_name' : 'POI Type',
        'activity_type_col_name':'Activity Types',
        'poi_point_col_name':'POI Point',
        'stop_point_lat': 'Stop Point Latitude',
        'stop_point_long':'Stop Point Longitude',
    },
    'activityMapper':{
        'file_path' : r"D:\MSc\MSBD 5014\FindingPointsOfInterestFromGPS\helper\POITypes.csv",
    }
} 

Fill in the missing values 

In [9]:
for col,ref in zip(configs['col_missing_vals'],configs['fill_missing_vals']):
    df.loc[df[col].isna(),col] =  df.loc[df[col].isna()][ref]
df.loc[df['gpsacc'].isna(),'gpsacc'] = df.loc[~df['gpsacc'].isna()]['gpsacc'].median()

In [10]:
uuid_collection_obj = UUIDCollection(df,**configs['UUID_Collection'])
stop_collection_obj = stopCollection(uuid_collection_obj,**configs['STOP_Collection'])
queryObj = ExtractAndOrganizeData(stops_of_interest=stop_collection_obj.filtered_stops,radius = 200)
poigdf = queryObj.extractAndorganizeData()

Finished making Trajectory Objects


In [11]:
poigdf_obj = POIgdf(poigdf,**configs['POIgdf'])
activitymapper_obj = activityMapper(**configs['activityMapper'])
poigdf_obj = activitymapper_obj.reset_POI_types(poigdf_obj)
poigdf_obj = activitymapper_obj.add_activity_types(poigdf_obj)
gravitymodel = gravityModel(activitymapper_obj,poigdf_obj)
results = gravitymodel.calculate_probability()

In [12]:
gravitymodel.counts_df

Unnamed: 0,Stop Point,Vehicle Purchase,Vehicle Repair,Tourist,Entertainment,Shopping,Educational Supply,Vehicle Wash,Leisure,Public Services,...,Shopping/Daily Shopping,Death Services,Gambling,Health Supplies,Public Place,Refuelling,Daily Shopping,Air Transport,Advisory,Education
0,POINT (114.21940 22.27954),0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,2,0,0,1
1,POINT (114.26365 22.31392),0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,POINT (114.26732 22.31576),0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,2,0,0,0
3,POINT (114.26999 22.31773),0,1,0,0,0,0,0,0,0,...,0,0,0,1,3,0,0,0,0,0
4,POINT (114.20873 22.32080),0,0,0,0,2,0,0,0,0,...,1,0,0,0,1,0,2,0,0,0


In [13]:
gravitymodel.distance_df

Unnamed: 0,Stop Point,Vehicle Purchase,Vehicle Repair,Tourist,Entertainment,Shopping,Educational Supply,Vehicle Wash,Leisure,Public Services,...,Shopping/Daily Shopping,Death Services,Gambling,Health Supplies,Public Place,Refuelling,Daily Shopping,Air Transport,Advisory,Education
0,POINT (114.21940 22.27954),10000000,10000000.0,10000000.0,10000000,147.6067,10000000,10000000,10000000,10000000,...,10000000.0,10000000,10000000,10000000.0,10000000.0,10000000,147.6067,10000000,10000000,108.0767
1,POINT (114.26365 22.31392),10000000,10000000.0,10000000.0,10000000,10000000.0,10000000,10000000,10000000,10000000,...,10000000.0,10000000,10000000,10000000.0,10000000.0,10000000,10000000.0,10000000,10000000,133.5628
2,POINT (114.26732 22.31576),10000000,10000000.0,120.9703,10000000,141.859,10000000,10000000,10000000,10000000,...,10000000.0,10000000,10000000,10000000.0,10000000.0,10000000,140.2513,10000000,10000000,10000000.0
3,POINT (114.26999 22.31773),10000000,142.9531,10000000.0,10000000,10000000.0,10000000,10000000,10000000,10000000,...,10000000.0,10000000,10000000,10.87539,99.68661,10000000,10000000.0,10000000,10000000,10000000.0
4,POINT (114.20873 22.32080),10000000,10000000.0,10000000.0,10000000,134.1613,10000000,10000000,10000000,10000000,...,95.80821,10000000,10000000,10000000.0,130.9063,10000000,134.1613,10000000,10000000,10000000.0


In [14]:
1/((133.5628)**2)

5.605688616623205e-05

In [15]:
results

Unnamed: 0,Stop Point,Vehicle Purchase,Vehicle Repair,Tourist,Entertainment,Shopping,Educational Supply,Vehicle Wash,Leisure,Public Services,...,Shopping/Daily Shopping,Death Services,Gambling,Health Supplies,Public Place,Refuelling,Daily Shopping,Air Transport,Advisory,Education
0,POINT (114.21940 22.27954),0.0,0.0,0.0,0.0,4.6e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,9.2e-05,0.0,0.0,8.6e-05
1,POINT (114.26365 22.31392),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.6e-05
2,POINT (114.26732 22.31576),0.0,0.0,6.8e-05,0.0,5e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000102,0.0,0.0,0.0
3,POINT (114.26999 22.31773),0.0,4.9e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.008455,0.000302,0.0,0.0,0.0,0.0,0.0
4,POINT (114.20873 22.32080),0.0,0.0,0.0,0.0,0.000111,0.0,0.0,0.0,0.0,...,0.000109,0.0,0.0,0.0,5.8e-05,0.0,0.000111,0.0,0.0,0.0


In [16]:
# from ast import literal_eval
# poi_type_list = pd.read_csv(r"D:\MSc\MSBD 5014\FindingPointsOfInterestFromGPS\helper\POITypes.csv")['POI Type'].tolist()
# activity_types = pd.read_csv(r"D:\MSc\MSBD 5014\FindingPointsOfInterestFromGPS\helper\POITypes.csv")['Activity'].tolist()
# poi_type_activity_dict = dict(zip(poi_type_list,activity_types))
# reset_types = lambda x: str([i for i in literal_eval(x) if i in poi_type_list])
# POIgdf['POI Type'] = POIgdf['POI Type'].apply(reset_types)
# POIgdf = POIgdf.loc[POIgdf['POI Type'] != '[]']
# # literal_eval(POIgdf['POI Type'][0])

In [17]:
# find_activity = lambda x : str(list(set(poi_type_activity_dict[i] for i in literal_eval(x))))

# POIgdf['Activity Types'] = np.nan
# POIgdf['Activity Types'] = POIgdf['POI Type'].apply(find_activity)

In [18]:
# POIgdf['Stop Point Latitude'] = POIgdf['Stop Point'].apply(lambda x: x.y)
# POIgdf['Stop Point Longitude'] = POIgdf['Stop Point'].apply(lambda x: x.x)

In [19]:
# from collections import Counter
# cols = ['Stop Point']
# cols.extend(list(set(activity_types)))
# activity_df_dict = {k:[] for k in cols}
# for k,grp in POIgdf.groupby(['Stop Point Latitude','Stop Point Longitude']):
#     activity_df_dict['Stop Point'].append(k)
#     activity_list = map(lambda x: literal_eval(x),grp['Activity Types'])
#     flat_activity_list = [activity for sublist in activity_list for activity in sublist]
#     count_dict = dict(Counter(flat_activity_list))
#     for activity in list(set(activity_types)):
#         if activity in count_dict.keys():
#             activity_df_dict[activity].append(count_dict[activity])
#         else:
#             activity_df_dict[activity].append(0)