In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter("ignore")
from datetime import datetime
import plotly.express as px
from skimage import io
import math
import time

In [None]:
Saturday = pd.DataFrame(pd.read_csv('park-movement-Sat.csv'))
Saturday["id"] = Saturday["id"].astype(str)
Saturday = Saturday.replace('check-in', 1).replace('movement', 0)
Saturday["Timestamp"] = Saturday["Timestamp"].map(lambda x: int(time.mktime(time.strptime(x, "%Y-%m-%d %H:%M:%S"))))
Saturday["visit_count"] = Saturday.groupby(["id"])["id"].transform("count")
T = pd.DataFrame(Saturday.groupby('id')['Timestamp'].agg(np.ptp)).reset_index()
T["visit_duration"] = T['Timestamp'].map(lambda x:x//60)
Saturday_III = Saturday.merge(T[["id", "visit_duration"]])
Saturday_III["Facility_coordinates"]= Saturday_III.apply(lambda row: (int(row.X),int(row.Y)), axis=1)
Saturday_III["X"] = Saturday_III["X"].map(lambda x:int(x))
Saturday_III["Y"] = Saturday_III["Y"].map(lambda x:int(x))

In [None]:
Saturday_III.to_csv('park-movement-Sat_NewII.csv')

In [None]:
Saturday = pd.DataFrame(pd.read_csv('park-movement-Sat_NewII.csv'))
Saturday = Saturday.drop(["Unnamed: 0"], axis=1)
Saturday["id"] = Saturday["id"].astype(str)
Saturday["Facility_coordinates"]= Saturday.apply(lambda row: (int(row.X),int(row.Y)), axis=1)
Saturday["X"] = Saturday["X"].map(lambda x:int(x))
Saturday["Y"] = Saturday["Y"].map(lambda x:int(x))

In [None]:
Saturday.isna().sum()

In [None]:
Saturday.info()

In [None]:
len(Saturday)

In [None]:
px.density_heatmap(Saturday, x='X', y='Y')

In [None]:
counts, bins = np.histogram(Saturday.Timestamp, bins=range(Saturday["Timestamp"].min(), Saturday["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

### Criterion One：Relative time difference and relative distance (Euclidean distance)

In [None]:
Group = []
# Families, relatives, couples, or any close small group
Crowd = []
# Groups of people who do not belong to each other in small groups, but have similar behavioral patterns
Single = []
# Tourists who do not belong to the above two groups

for index, ID in enumerate(Saturday.id.unique()):
    D = Saturday.loc[Saturday['id'] == ID] 
    Trajectory_I  = D[['id','Timestamp', 'type', "Facility_coordinates"]].sort_values(by=['Timestamp']).to_numpy() 

    Len1 = len(Trajectory_I)
    
    for index2, ID2 in enumerate(Saturday.id.unique()[index:]):
        if index2 > 10: 
            break
        if ID == ID2: 
            continue
            
        D2 = Saturday.loc[Saturday['id'] == ID2] 
        Trajectory_II = D2[['id','Timestamp', 'type', "Facility_coordinates"]].sort_values(by=['Timestamp']).to_numpy()
        Len2 = len(Trajectory_II)
        
        
        if Len1 == Len2:
            track_1 =  pd.Series(list(Trajectory_I[:,3]))
            track_2 =  pd.Series(list(Trajectory_II[:,3]))
            distance = list( track_1.combine(track_2, (lambda x1, x2: 1 if np.linalg.norm(np.array(x1) - np.array(x2)) <= 2 else 0 )))
            Ddiff = distance.count(1)/len(distance)            
            
            if Ddiff >= 0.7:
        
                Tdiff = np.median(np.abs(Trajectory_I[:,1]-Trajectory_II[:,1]))

                if Tdiff <= 60:
                    Group.append(ID)
                    Group.append(ID2)
                    
                else:
                    Crowd.append(ID)
                    Crowd.append(ID2)

                    
        else:
            track_1 =  pd.Series(list(Trajectory_I[:,3]))
            track_2 =  pd.Series(list(Trajectory_II[:,3]))
            distance = list( track_1.combine(track_2, (lambda x1, x2: 1 if np.linalg.norm(np.array(x1) - np.array(x2)) <= 2 else 0 )))
            Ddiff = distance.count(1)/len(distance)
        
            if Ddiff >= 0.7:
                Crowd.append(ID)
                Crowd.append(ID2)

            else:
                Single.append(ID)
                Single.append(ID2)


In [None]:
d1 = pd.read_csv('SaturdayGroupCount.csv')
d2 = pd.read_csv('SaturdayCrowdCount.csv')
d3 = pd.read_csv('SaturdaySingleCount.csv')

Group = [  i.split(".", 1)[0] for i in d1.columns ]
Crowd = [  i.split(".", 1)[0] for i in d2.columns ]
Single = [  i.split(".", 1)[0] for i in d3.columns ]

In [None]:
Saturday_Group = list(set(Group))
Saturday_Crowd = list(set(Crowd)-set(Group))
Saturday_Single=  list(set(list(set(Single)-set(Group)))-set(Crowd)) 

In [None]:
len(Saturday.id.unique()),len(Saturday_Group), len(Saturday_Crowd), len(Saturday_Single)

### Criterion Two：Earliest arrival time grouping

In [None]:
TimePointMorning = time.mktime(time.strptime("2014-6-07 11:00:00", "%Y-%m-%d %H:%M:%S"))
TimePointAfterNoon = time.mktime(time.strptime("2014-6-07 16:00:00", "%Y-%m-%d %H:%M:%S"))

ArrivalTime = Saturday.copy().drop_duplicates(subset='id')
ArrivalTime["ArrivalTime"] = ArrivalTime["Timestamp"].map(lambda x: 1 if x- TimePointMorning >= 0 and x - TimePointAfterNoon <= 0 else ( 2 if x - TimePointAfterNoon >= 0 else 0 ) )
Early = list(ArrivalTime[ArrivalTime["ArrivalTime"]==0].id.unique())
Noon =  list(ArrivalTime[ArrivalTime["ArrivalTime"]==1].id.unique())
Late = list(ArrivalTime[ArrivalTime["ArrivalTime"]==2].id.unique())
len(Saturday.id.unique()), len(Early), len(Noon), len(Late)

### Criterion Three：duration stay time

In [None]:
np.mean(Saturday["visit_duration"].unique())
HighDuration = list(Saturday[Saturday["visit_duration"] >= np.mean(Saturday["visit_duration"].unique())].id.unique())
LowDuration = list(Saturday[Saturday["visit_duration"] < np.mean(Saturday["visit_duration"].unique())].id.unique())
len(Saturday.id.unique()), len(HighDuration), len(LowDuration)

#### Group 1

In [None]:
G1 = list(set(Saturday_Group)&set(Early)&set( HighDuration))
G1P = Saturday.loc[Saturday['id'].isin(G1)]

In [None]:
len(G1P.id.unique())

In [None]:
px.density_heatmap(G1P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G1P.Timestamp, bins=range(G1P["Timestamp"].min(), G1P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

In [None]:
Saturday.loc[Saturday['id'].isin(list(set(Saturday_Crowd)&set(Early)&set( LowDuration)))]        

#### Group 2

In [None]:
G2 = list(set(Saturday_Crowd)&set(Early)&set( HighDuration))
G2P = Saturday.loc[Saturday['id'].isin(G2)]
len(G2)

In [None]:
px.density_heatmap(G2P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G2P.Timestamp, bins=range(G2P["Timestamp"].min(), G2P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 3

In [None]:
G3 = list(set(Saturday_Single)&set(Early)&set(HighDuration))
G3P = Saturday.loc[Saturday['id'].isin(G3)]
len(G3)

In [None]:
px.density_heatmap(G3P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G3P.Timestamp, bins=range(G3P["Timestamp"].min(), G3P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 4

In [None]:
G4 = list(set(Saturday_Group)&set(Noon))
G4P = Saturday.loc[Saturday['id'].isin(G4)]
len(G4) 

In [None]:
px.density_heatmap(G4P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G4P.Timestamp, bins=range(G4P["Timestamp"].min(), G4P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 5

In [None]:
G5 = list(set(Saturday_Crowd)&set(Noon))
G5P = Saturday.loc[Saturday['id'].isin(G5)]
len(G5) 

In [None]:
px.density_heatmap(G5P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G5P.Timestamp, bins=range(G5P["Timestamp"].min(), G5P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 6

In [None]:
G6 = list(set(Saturday_Single)&set(LowDuration))
G6P = Saturday.loc[Saturday['id'].isin(G6)]
len(G6) 

In [None]:
px.density_heatmap(G6P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G6P.Timestamp, bins=range(G6P["Timestamp"].min(), G6P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 7 

In [None]:
G7 = list(set(Saturday_Group)&set(LowDuration))
G7P = Saturday.loc[Saturday['id'].isin(G7)]
len(G7)

In [None]:
px.density_heatmap(G7P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G7P.Timestamp, bins=range(G7P["Timestamp"].min(), G7P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Group 8 

In [None]:
G8 = list(set(Saturday_Group)&set(Late))
G8P = Saturday.loc[Saturday['id'].isin(G8)]
len(G8)

In [None]:
px.density_heatmap(G8P, x='X', y='Y')

In [None]:
counts, bins = np.histogram(G8P.Timestamp, bins=range(G8P["Timestamp"].min(), G8P["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Criterion Four, the time interval between the activities of each visitor

In [None]:
visitors_td = []
max_td = []
med_td = []
for index, ID in enumerate(Saturday.id.unique()):
    visitors = Saturday.loc[Saturday['id'] == ID] 
    l = len(visitors['Timestamp'].values)
    diff  = abs(visitors['Timestamp'].values[1:] - Saturday['Timestamp'].values[0:l-1])
    # Calculate the time difference between the tourist's first action and the next second action
    dmax = max(diff)
    dmed = np.median(diff)
    visitors_td.append(ID)
    max_td.append(dmax)
    med_td.append(dmed)

####  Visitors with huge gaps between actions (top 150), sorted from largest to smallest

In [None]:
inds1 =  np.argsort(max_td)
GapH = np.array(visitors_td)[inds1[-150:]]
GapHV = Saturday.loc[Saturday['id'].isin(GapH)]
px.density_heatmap(GapHV, x=GapHV['X'], y=GapHV['Y'])

In [None]:
counts, bins = np.histogram(GapHV.Timestamp, bins=range(GapHV["Timestamp"].min(), GapHV["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Fast-moving tourists (low gap between actions)

In [None]:
inds2 =  np.argsort(med_td) 
Faster = np.array(visitors_td)[inds2[0:150]]
FasterV = Saturday.loc[Saturday['id'].isin(Faster)]
px.density_heatmap(FasterV, x=FasterV['X'], y=FasterV['Y'])

In [None]:
counts, bins = np.histogram(FasterV.Timestamp, bins=range(FasterV["Timestamp"].min(), FasterV["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

#### Slow-moving tourists (high gap between actions)

In [None]:
inds3 =  np.argsort(med_td)
Slower = np.array(visitors_td)[inds3[-150:]]
SlowerV = Saturday.loc[Saturday['id'].isin(Slower)]
px.density_heatmap(SlowerV, x=SlowerV['X'], y=SlowerV['Y'])

In [None]:
counts, bins = np.histogram(SlowerV.Timestamp, bins=range(SlowerV["Timestamp"].min(), SlowerV["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

### Group EX

In [None]:
ShowTimeS = time.mktime(time.strptime("2014-6-07 09:00:00", "%Y-%m-%d %H:%M:%S"))
ShowTimeE = time.mktime(time.strptime("2014-6-07 11:00:00", "%Y-%m-%d %H:%M:%S"))

CheckPointM = Saturday.copy()
SuspiciousIN = CheckPointM[   (CheckPointM["Timestamp"]>=ShowTimeS) & (CheckPointM["Timestamp"]<=ShowTimeE)                                                       ]
SuspiciousIN = SuspiciousIN[  (SuspiciousIN["X"]>=20) &  (SuspiciousIN["Y"]>=30) ]
SuspiciousIN = SuspiciousIN[  (SuspiciousIN["X"]<=40) &  (SuspiciousIN["Y"]<=40) ]

In [None]:
ShowTimeS2 = time.mktime(time.strptime("2014-6-07 14:00:00", "%Y-%m-%d %H:%M:%S"))
ShowTimeE2 = time.mktime(time.strptime("2014-6-07 16:00:00", "%Y-%m-%d %H:%M:%S"))

CheckPointA = Saturday.copy()
SuspiciousIN2 = CheckPointA[   (CheckPointA["Timestamp"]>=ShowTimeS2) & (CheckPointA["Timestamp"]<=ShowTimeE2)                                                       ]
SuspiciousIN2 = SuspiciousIN2[  (SuspiciousIN2["X"]>=20) &  (SuspiciousIN2["Y"]>=30) ]
SuspiciousIN2 = SuspiciousIN2[  (SuspiciousIN2["X"]<=40) &  (SuspiciousIN2["Y"]<=40) ]

In [None]:
SuspiciousGroup =  list(set( list(SuspiciousIN.id.unique()) ) & set(list(SuspiciousIN2.id.unique()))) 
GSP = Saturday.loc[Saturday['id'].isin(SuspiciousGroup)]
len(SuspiciousGroup)

In [None]:
px.density_heatmap(GSP, x='X', y='Y')

In [None]:
counts, bins = np.histogram(GSP.Timestamp, bins=range(GSP["Timestamp"].min(), GSP["Timestamp"].max(), 60))
bins = bins[1:]
px.line(['timestamp', 'count'], x=bins, y=counts, labels={'x':'timestamp', 'y':'count'})

In [None]:
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime( 1402178400))

In [None]:
Area = []
for index, ID in enumerate(EarlyPart.id.unique()):
    visitors = EarlyPart.loc[EarlyPart['id'] == ID] 
    is_selected = False
    diff = len(visitors.loc[visitors['X'] < 50].loc[visitors['Y'] < 50])/len(visitors)
    Area_1.append(diff)
    
    diff = len(visitors.loc[visitors['X'] > 50].loc[visitors['Y'] < 50])/len(visitors)
    Area_2.append(diff)
    
    diff = len(visitors.loc[visitors['X'] < 50].loc[visitors['Y'] > 50])/len(visitors)
    Area_3.append(diff)
    
    diff = len(visitors.loc[visitors['X'] > 50].loc[visitors['Y'] > 50])/len(visitors)
    Area_4.append(diff)
    
for i in [Area_1,Area_2,Area_3,Area_4]:
    area = px.histogram(i, nbins=20)
    area.show()