<a href="https://colab.research.google.com/github/Kim-TaeKyoung/one/blob/main/3_MinuteSummary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from glob import glob
import pandas as pd
from matplotlib import pyplot as plt
import os
import seaborn as sns
import numpy as np

In [None]:
#@title Smallest Enclosing Circle
import numpy as np
 
class ProjectorStack:
    """
    Stack of points that are shifted / projected to put first one at origin.
    """
    def __init__(self, vec):
        self.vs = np.array(vec)
 
    def push(self, v):
        if len(self.vs) == 0:
            self.vs = np.array([v])
        else:
            self.vs = np.append(self.vs, [v], axis=0)
        return self
 
    def pop(self):
        if len(self.vs) > 0:
            ret, self.vs = self.vs[-1], self.vs[:-1]
            return ret
 
    def __mul__(self, v):
        s = np.zeros(len(v))
        for vi in self.vs:
            s = s + vi * np.dot(vi, v)
        return s
 
class GaertnerBoundary:
    """
        GärtnerBoundary
 
    See the passage regarding M_B in Section 4 of Gärtner's paper.
    """
    def __init__(self, pts):
        self.projector = ProjectorStack([])
        self.centers, self.square_radii = np.array([]), np.array([])
        self.empty_center = np.array([np.NaN for _ in pts[0]])
 
 
def push_if_stable(bound, pt):
    if len(bound.centers) == 0:
        bound.square_radii = np.append(bound.square_radii, 0.0)
        bound.centers = np.array([pt])
        return True
    q0, center = bound.centers[0], bound.centers[-1]
    C, r2  = center - q0, bound.square_radii[-1]
    Qm, M = pt - q0, bound.projector
    Qm_bar = M * Qm
    residue, e = Qm - Qm_bar, sqdist(Qm, C) - r2
    z, tol = 2 * sqnorm(residue), np.finfo(float).eps * max(r2, 1.0)
    isstable = np.abs(z) > tol
    if isstable:
        center_new  = center + (e / z) * residue
        r2new = r2 + (e * e) / (2 * z)
        bound.projector.push(residue / np.linalg.norm(residue))
        bound.centers = np.append(bound.centers, np.array([center_new]), axis=0)
        bound.square_radii = np.append(bound.square_radii, r2new)
    return isstable
 
def pop(bound):
    n = len(bound.centers)
    bound.centers = bound.centers[:-1]
    bound.square_radii = bound.square_radii[:-1]
    if n >= 2:
        bound.projector.pop()
    return bound
 
 
class NSphere:
    def __init__(self, c, sqr):
        self.center = np.array(c)
        self.sqradius = sqr
 
def isinside(pt, nsphere, atol=1e-6, rtol=0.0):
    r2, R2 = sqdist(pt, nsphere.center), nsphere.sqradius
    return r2 <= R2 or np.isclose(r2, R2, atol=atol**2,rtol=rtol**2)
 
def allinside(pts, nsphere, atol=1e-6, rtol=0.0):
    for p in pts:
        if not isinside(p, nsphere, atol, rtol):
            return False
    return True
 
def move_to_front(pts, i):
    pt = pts[i]
    for j in range(len(pts)):
        pts[j], pt = pt, np.array(pts[j])
        if j == i:
            break
    return pts
 
def dist(p1, p2):
    return np.linalg.norm(p1 - p2)
 
def sqdist(p1, p2):
    return sqnorm(p1 - p2)
 
def sqnorm(p):
    return np.sum(np.array([x * x for x in p]))
 
def ismaxlength(bound):
    len(bound.centers) == len(bound.empty_center) + 1
 
def makeNSphere(bound):
    if len(bound.centers) == 0: 
        return NSphere(bound.empty_center, 0.0)
    return NSphere(bound.centers[-1], bound.square_radii[-1])
 
def _welzl(pts, pos, bdry):
    support_count, nsphere = 0, makeNSphere(bdry)
    if ismaxlength(bdry):
        return nsphere, 0
    for i in range(pos):
        if not isinside(pts[i], nsphere):
            isstable = push_if_stable(bdry, pts[i])
            if isstable:
                nsphere, s = _welzl(pts, i, bdry)
                pop(bdry)
                move_to_front(pts, i)
                support_count = s + 1
    return nsphere, support_count
 
def find_max_excess(nsphere, pts, k1):
    err_max, k_max = -np.Inf, k1 - 1
    for (k, pt) in enumerate(pts[k_max:]):
        err = sqdist(pt, nsphere.center) - nsphere.sqradius
        if  err > err_max:
            err_max, k_max = err, k + k1
    return err_max, k_max - 1
 
def welzl(points, maxiterations=2000):
    pts, eps = np.array(points, copy=True), np.finfo(float).eps
    bdry, t = GaertnerBoundary(pts), 1
    nsphere, s = _welzl(pts, t, bdry)
    for i in range(maxiterations):
        e, k = find_max_excess(nsphere, pts, t + 1)
        if e <= eps:
            break
        pt = pts[k]
        push_if_stable(bdry, pt)
        nsphere_new, s_new = _welzl(pts, s, bdry)
        pop(bdry)
        move_to_front(pts, k)
        nsphere = nsphere_new
        t, s = s + 1, s_new + 1
    return nsphere

In [None]:
RESULT_DIR = '/content/drive/MyDrive/Colab/SummaryCSV/'

In [None]:
CSV_DIR = '/content/drive/MyDrive/Colab/RefinedCSV/'
# 20211012 to 20211023
TARGET_DATE = '20211012'

TARGET_CSV = pd.read_csv(os.path.join(CSV_DIR, TARGET_DATE + '_PhoneGPS-AlphaPose-Device-Output.csv'))

In [None]:
TARGET_CSV = TARGET_CSV[(TARGET_CSV['lat'] != 0) & 
           (TARGET_CSV['lon'] != 0) &
           (TARGET_CSV['lat'] == TARGET_CSV['lat']) &
           (TARGET_CSV['lon'] == TARGET_CSV['lon'])]

In [None]:
segments = list(TARGET_CSV.groupby('segment_name'))

In [None]:
segments_dict = {}

for segment in segments:
  if not segment[0] in segments_dict:
    segments_dict.update({segment[0] : None})
  segments_dict[segment[0]] = list(segment[1].groupby('minutes'))

In [None]:
df_return = pd.DataFrame()

for key in segments_dict.keys():
  for _, curr_min in segments_dict[key]:
    points = pd.concat([curr_min['lat'], curr_min['lon']], axis=1)
    nsphere = welzl(points.values)

    start_row = curr_min.iloc[0]
    end_row = curr_min.iloc[-1]

    segment_name = start_row['segment_name']
    minute = start_row['minutes']
    second_data_count = len(curr_min)

    #start_ad = start_row['ad_id']
    #start_ad_count = (curr_min['ad_id'] == start_row['ad_id']).sum()
    #end_ad = end_row['ad_id']
    #end_ad_count = (curr_min['ad_id'] == end_row['ad_id']).sum()

    ad_id_list = curr_min['ad_id'].unique()
    ad_id_count = []

    for ad in ad_id_list:
      ad_id_count.append((curr_min['ad_id'] == ad).sum())

    start_timestamp = start_row['timestamp']
    end_timestamp = end_row['timestamp']

    start_hour = start_row['time_hour']
    start_minute = start_row['time_minute']
    end_hour = end_row['time_hour']
    end_minute = end_row['time_minute']

    geo_points_center_lat = nsphere.center[0]
    geo_points_center_lon = nsphere.center[1]

    geo_points = list(zip(curr_min['lat'], curr_min['lon'], curr_min['alt'], curr_min['speed']))
    #dong_based_geo = curr_min['dong'].values

    illuminance = curr_min['illuminance'].mean()

    head_count = curr_min['head_count'].apply(lambda x: round(x) if x == x else x).iloc[0]

    T_valid_count = curr_min['T_valid_count'].iloc[0]
    T_front_count = curr_min['T_front_count'].iloc[0]
    T_valid_second = curr_min['T_valid_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
    T_front_second = curr_min['T_front_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
  
    T_attention1_count = curr_min['T_attention1_count'].iloc[0]
    T_attention2_count = curr_min['T_attention2_count'].iloc[0]
    T_attention1_second = curr_min['T_attention1_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
    T_attention2_second = curr_min['T_attention2_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
    video_file = start_row['video_file']
    

    new_minute_summary_row = [segment_name, minute, second_data_count, ad_id_list, ad_id_count, start_timestamp, end_timestamp, start_hour, start_minute, end_hour, end_minute,
                              geo_points_center_lat, geo_points_center_lon, geo_points, illuminance, head_count, T_valid_count, T_front_count,
                              T_valid_second, T_front_second, T_attention1_count, T_attention2_count, T_attention1_second, T_attention2_second, video_file]
    
    df_minute = pd.Series(new_minute_summary_row, index=['segment_name', 'minute', 'second_data_count', 'ad_id_list', 'ad_id_count', 'start_timestamp', 'end_timestamp', 'start_hour', 'start_minute',
                                  'end_hour', 'end_minute', 'geo_points_center_lat', 'geo_points_center_lon', 'geo_points',
                                  'illuminance', 'head_count', 'T_valid_count', 'T_front_count' ,'T_valid_second', 'T_front_second', 'T_attention1_count', 'T_attention2_count',
                                  'T_attention1_second', 'T_attention2_second', 'video_file']).to_frame(0).T

    '''
    new_minute_summary_row = [segment_name, minute, second_data_count, ad_id_list, ad_id_count, start_timestamp, end_timestamp, start_hour, start_minute, end_hour, end_minute,
                              geo_points_center_lat, geo_points_center_lon, geo_points, dong_based_geo, illuminance, head_count,
                              T_valid_second, T_front_second, T_attention1_count, T_attention2_count, T_attention1_second, T_attention2_second, video_file]
    
    df_minute = pd.Series(new_minute_summary_row, index=['segment_name', 'minute', 'second_data_count', 'ad_id_list', 'ad_id_count', 'start_timestamp', 'end_timestamp', 'start_hour', 'start_minute',
                                  'end_hour', 'end_minute', 'geo_points_center_lat', 'geo_points_center_lon', 'geo_points',
                                  'dong_based_geo', 'illuminance', 'head_count', 'T_valid_second', 'T_front_second', 'T_attention1_count', 'T_attention2_count',
                                  'T_attention1_second', 'T_attention2_second', 'video_file']).to_frame(0).T
    '''

    df_return = pd.concat([df_return, df_minute], axis=0, ignore_index=True)

In [None]:
# #@title AB Test
# df_return = pd.DataFrame()

# for key in segments_dict.keys():
#   for _, curr_min in segments_dict[key]:
#     points = pd.concat([curr_min['lat'], curr_min['lon']], axis=1)
#     nsphere = welzl(points.values)

#     start_row = curr_min.iloc[0]
#     end_row = curr_min.iloc[-1]

#     segment_name = start_row['segment_name']
#     minute = start_row['minutes']
#     second_data_count = len(curr_min)

#     start_ad = start_row['ad_id']
#     start_ad_count = (curr_min['ad_id'] == start_row['ad_id']).sum()
#     end_ad = end_row['ad_id']
#     end_ad_count = (curr_min['ad_id'] == end_row['ad_id']).sum()

#     start_timestamp = start_row['timestamp']
#     end_timestamp = end_row['timestamp']

#     start_hour = start_row['time_hour']
#     start_minute = start_row['time_minute']
#     end_hour = end_row['time_hour']
#     end_minute = end_row['time_minute']

#     geo_points_center_lat = nsphere.center[0]
#     geo_points_center_lon = nsphere.center[1]

#     geo_points = list(zip(curr_min['lat'], curr_min['lon'], curr_min['alt'], curr_min['speed']))
#     dong_based_geo = curr_min['dong'].values

#     illuminance = curr_min['illuminance'].mean()

#     head_count = curr_min['head_count'].apply(lambda x: round(x) if x == x else x).iloc[0]
#     T_valid_second = curr_min['T_valid_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
#     T_front_second = curr_min['T_front_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
#     T_attention1_count = curr_min['T_attention1_count'].iloc[0]
#     T_attention2_count = curr_min['T_attention2_count'].iloc[0]
#     T_attention1_second = curr_min['T_attention1_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
#     T_attention2_second = curr_min['T_attention2_second'].apply(lambda x: round(x, 2) if x == x else x).iloc[0]
#     video_file = start_row['video_file']
    

#     new_minute_summary_row = [segment_name, minute, second_data_count, start_ad, end_ad, start_ad_count, end_ad_count, start_timestamp, end_timestamp, start_hour, start_minute, end_hour, end_minute,
#                               geo_points_center_lat, geo_points_center_lon, geo_points, dong_based_geo, illuminance, head_count,
#                               T_valid_second, T_front_second, T_attention1_count, T_attention2_count, T_attention1_second, T_attention2_second, video_file]
    
#     df_minute = pd.Series(new_minute_summary_row, index=['segment_name', 'minute', 'second_data_count', 'start_ad', 'end_ad', 'start_ad_count', 'end_ad_count', 'start_timestamp', 'end_timestamp', 'start_hour', 'start_minute',
#                                   'end_hour', 'end_minute', 'geo_points_center_lat', 'geo_points_center_lon', 'geo_points',
#                                   'dong_based_geo', 'illuminance', 'head_count', 'T_valid_second', 'T_front_second', 'T_attention1_count', 'T_attention2_count',
#                                   'T_attention1_second', 'T_attention2_second', 'video_file']).to_frame(0).T


#     df_return = pd.concat([df_return, df_minute], axis=0, ignore_index=True)

In [None]:
# df_return[df_return['start_ad'] != df_return['end_ad']]

In [None]:
df_return.to_csv(os.path.join(RESULT_DIR, TARGET_DATE + '-SummaryCSV.csv'), index=False, encoding="utf-8-sig")