# Rationale behind a model being a 'baseline' model

1. No trajectory or sequence of steps awareness
2. No timeline awareness
3. Memory-less
4. Assume stable and general trajectory for users and at all time

# Import Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import Counter

In [2]:
df_train = pd.read_csv('df_train_1k.csv')
df_test = pd.read_csv('df_test_1k.csv')

# Baseline Model

Mode, based on frequency, of `combined_xy` of `uid` at `t`

In [3]:
all_t = [int(i) for i in range(48)] # store all the possible time
most_freq_locs = []

for t in range(len(all_t)):
    # for all possible time, find all of the location at that time
    loc = list(df_train[df_train['t'] == t]['combined_xy'])
    
    # find the most frequently visited location
    most_freq_loc , count = Counter(loc).most_common(1)[0]
    most_freq_locs.append(most_freq_loc)

In [4]:
test_uids = df_test['uid'].unique()
total_correct = 0

for test_uid in test_uids:
    # get the last location of a particular test uid
    true_label_loc = df_test[df_test['uid']==test_uid].iloc[[-1]]['combined_xy'].item()
    true_label_time = df_test[df_test['uid']==test_uid].iloc[[-1]]['t'].item()
    total_correct += int(true_label_loc == most_freq_locs[true_label_time])

In [6]:
accuracy = total_correct/len(test_uids)
print(f"Accuracy: {accuracy}, Num Correct: {total_correct}, Num Sample: {len(test_uids)}")

""" 10k
Accuracy: 0.0005055611729019212, Num Correct: 1, Num Sample: 1978
"""

""" 1k
Accuracy: 0.0, Num Correct: 0, Num Sample: 200
"""