# Creation of JSON

## Imports

In [3]:
import pandas as pd
import numpy as np
import pickle
import requests
import json
from tqdm import tqdm
import math
from datetime import datetime, timedelta
import time
from scipy.spatial import QhullError

from api_functions import *
from parquet_union_data_preprocess import *
from neiborhood_class import *
from area_classification import label_area

## Loading DATA

In [2]:
#api_key = 'AIzaSyDCL0QT2X4-JMar3AxMbDaFrHrDChTTmeo'  # Marios
api_key = 'AIzaSyAZrllCfkVCImS3m2MwbdXOlH4ddU42H24' # Tilemachos

qol_countries = pd.read_pickle("../data/external/Numbeo_Countries.pkl")
place_types = pd.read_pickle("../data/processed/place_types_df.pkl")

all_data_day_5am = pd.read_pickle("../data/processed/all_data_day_5am.pkl")
tpls = pd.read_pickle("../data/processed/tpls.pkl")
#tpls.groupby('mode').count()

In [8]:
tpls.groupby('mode').count()

Unnamed: 0_level_0,user_id,started_at,finished_at,geom
mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
fast_mobility,22,22,22,22
motorized_mobility,166,166,166,166
slow_mobility,499,499,499,499


## JSON request

In [5]:
def json_return(folder_name, api_key='AIzaSyAZrllCfkVCImS3m2MwbdXOlH4ddU42H24', blur_house_loc=True):
    """
    Process location data to identify home locations, calculate distances, and prepare a JSON response with various metrics.

    Args:
    data (DataFrame): A pandas DataFrame containing location data with 'day_till_5am', 'latitude', and 'longitude' columns.
    api_key (str, optional): API key for accessing external data sources, if needed.

    Returns:
    dict: A dictionary structured as JSON containing processed results.
    """
    
    data = data_for_trackintel(folder_name)

    
    # Finding home locations for each day
    homes = {}
    days = data['day_till_5am'].unique()

    for day in days:
        # Filter data for each unique day
        temp_data = data[data['day_till_5am'].dt.date == pd.to_datetime(day).date()]
        homes[day] = find_home(temp_data)

    # Fill in missing home locations based on previous days
    homes = define_None_home(homes)
    homes_lst = list(homes.values())

    # Initialize variables for processing
    results = []
    previous_day_home = None
    last_day = len(days) - 1

    results_count = 0
    # Iterate over each day to calculate various metrics
    for n, day in enumerate(days):
        home_location = homes[day]

        # Initialize a new period of calculation or handle the first day
        if n == 0:
            temp_dict = {'start_date': day, 'home_location': home_location}
            home_lat, home_lon = home_location
            temp_days, temp_homes_list = [day], [home_location]
            new_result_days = []
            measures_per_day_list = []
            period_length, new_result_counter_3_day = 1, 0
            results_count += 1

        # Check if conditions are met to finalize the current period and start a new one
        elif (n == last_day) or (haversine(home_location[0], home_location[1], previous_day_home[0], previous_day_home[1]) > 50000) or (new_result_counter_3_day >= 3):
            
            # Compute median home location, distances, and time away from home
            temp_homes_df = pd.DataFrame(temp_homes_list)
            home_lat, home_lon = temp_homes_df.median()
            home_location_median = [home_lat, home_lon]
            temp_data = data[(data['day_till_5am'] >= temp_dict['start_date']) & (data['day_till_5am'] <= days[n-1])]
            number_of_days = temp_data['day_till_5am'].nunique() - new_result_counter_3_day
            distances_from_home = temp_data.apply(lambda row: haversine(home_lat, home_lon, row['latitude'], row['longitude']), axis=1)
            median_distance_from_home = distances_from_home[(distances_from_home > 25) & (distances_from_home < 40000)].median()
            time_away = time_away_from_home(temp_data, distances_from_home)

            # Calculate area and perimeter
            gps_points = list_of_tuples_gps(temp_data)
            area, perimeter = calculate_convex_hull_area_and_perimeter(gps_points)

            # Calculate Green and Blue areas
            green, blue = green_persentage_in_area(home_location_median)

            # Classify areo as rural, suburban and urban
            

            total_area = []
            total_periemeter = []
            total_time = []
            total_distance = []
            total_walking_time = []

            # Place API for staypoints
            staypoints, triplegs = track_intel(temp_data)
            temp_sp_dict = iterating_staypoints_df(staypoints, home_location)
            
            # Taking measures for each day
            data_dump_list = []
            staypoints = []
            for date in temp_days:
                day_dict = {}
                measures_day = {}
                temp_data_day = temp_data[temp_data['day_till_5am'] == date]
                day_distances = temp_data_day.apply(lambda row: haversine(home_lat, home_lon, row['latitude'], row['longitude']), axis=1)
                distances_from_home_day = day_distances[(day_distances > 25) & (day_distances < 40000)].median()
                total_distance.append(distances_from_home_day)
                time_at_home_day = calculate_time_away_from_home(temp_data_day, tuple(home_location_median))
                total_time.append(pd.Timedelta(time_at_home_day, unit='sec'))
                gps_points_day = list_of_tuples_gps(temp_data_day)
                staypoints_day, walking_time = track_intel(temp_data_day)
                total_walking_time.append(walking_time)
                if len(gps_points_day) < 3:
                    area_day, perimeter_day = None, None
                else:
                    area_day, perimeter_day = calculate_convex_hull_area_and_perimeter(gps_points_day)
                    total_area.append(area_day)
                    total_periemeter.append(perimeter_day)
                measures_day.update({
                    'time_away_from_home_per_day': pd.Timedelta(time_at_home_day, unit='sec'),
                    'distance_from_home_per_day': distances_from_home_day,
                    'area_per_day' : area_day,
                    'perimeter_per_day' : perimeter_day,
                    'walking_time' : walking_time
                })
                day_dict[date.isoformat()] = measures_day
                data_dump_list.append(day_dict)

            # QoL indexes from country
            country = get_country_from_coordinates(home_location_median, api_key)
            QoL, PPI, SI, HCI, CoLI, PPtIR, TCTI, PI, CI = get_country_indices(country)

            if blur_house_loc == True:
                home_location_median[0], home_location_median[1] = randomize_location(home_location_median[0], home_location_median[1])

            number_of_days1 = (days[n-1] - temp_dict['start_date']).days + 1

            
            # Update the result dictionary and add it to the results list
            temp_dict.update({
                'home_location': home_location_median,
                'start_date': temp_dict['start_date'].isoformat(),
                'end_date': days[n-1].isoformat(),
                'number_of_days': number_of_days1,
                'distance_from_home': int(np.nanmean(total_distance)),
                'time_away_from_home':   np.average(total_time),
                'area': np.average(total_area),
                'perimeter': np.average(total_periemeter),
                'walking_time': sum(total_walking_time, timedelta())/len(total_walking_time),
                'area_type':label_area()
                'green_percentage_in_area' : green,
                'qol_country_index' : QoL,
                'data_dump' : data_dump_list
            })
            temp_sp_dict = changes_in_place_api_dict(temp_sp_dict, number_of_days) #number_of_days if we want average
            temp_dict.update(temp_sp_dict)
            if len(temp_days) >= 3:
                results.append(temp_dict)

            # Initialize the next period
            results_count += 1
            #print('new result')
            temp_days, temp_homes_list = [day], [home_location]
            temp_dict = {'start_date': day, 'home_location': home_location}
            period_length, new_result_counter_3_day = 1, 0
            home_lat, home_lon = home_location

        # Continue accumulating data for the current period
        else:
            temp_days.append(day)
            temp_homes_list.append(home_location)
            period_length += 1
            if haversine(home_lat, home_lon, home_location[0], home_location[1]) > 500:
                new_result_days.append(day)
                new_result_counter_3_day += 1
            else:
                new_result_days = []
                new_result_counter_3_day = 0
        previous_day_home = homes[day]

    response = {'results': results}
    return response

In [27]:
# Add folder file name
response = json_return('google-oauth2_109244383376429682325', api_key)

new result
new result
new result
new result




new result
new result


 71043 71546 71547 73500 73501 73502 76539 76540] lead to invalid tripleg geometries. The resulting triplegs were omitted and the tripleg id of the positionfixes was set to nan


new result


In [28]:
answer = pd.DataFrame(response['results'])
answer

Unnamed: 0,start_date,home_location,end_date,number_of_days,distance_from_home,time_away_from_home,area,perimeter,walking_time,data_dump
0,2023-05-04T00:00:00,"[36.44789993434422, 28.221256178048577]",2023-05-07T00:00:00,3,372,0 days 09:45:42.001798,0.039641,0.0001,0 days 03:53:18.542405990,[{'2023-05-04T00:00:00': {'time_away_from_home...
1,2023-05-08T00:00:00,"[37.96080476180212, 23.70175212329792]",2023-05-12T00:00:00,4,5804,0 days 07:05:15.775334200,1.998238,0.092934,0 days 04:17:34.444220509,[{'2023-05-08T00:00:00': {'time_away_from_home...
2,2023-05-14T00:00:00,"[37.96077469633464, 23.704425702138366]",2023-06-04T00:00:00,21,3959,0 days 05:18:47.476354153,0.266086,0.012136,0 days 03:43:13.434663864,[{'2023-05-14T00:00:00': {'time_away_from_home...
3,2023-06-05T00:00:00,"[50.8439038752509, 4.346990190991418]",2023-06-15T00:00:00,10,4133,0 days 05:35:55.168481545,5.046917,0.652709,0 days 02:18:53.202888192,[{'2023-06-05T00:00:00': {'time_away_from_home...
4,2023-06-29T00:00:00,"[37.96218378314387, 23.70294023086282]",2023-07-07T00:00:00,8,5379,0 days 05:03:52.022181888,1.006214,0.085911,0 days 03:02:26.114156726,[{'2023-06-29T00:00:00': {'time_away_from_home...
5,2023-09-21T00:00:00,"[50.86689754181535, 4.243679209580824]",2023-11-10T00:00:00,50,6534,0 days 07:18:06.512944183,3.049504,0.079127,0 days 03:17:07.867763784,[{'2023-09-21T00:00:00': {'time_away_from_home...


In [6]:
#answer['data_dump'][1]