In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from modules.paa import PAA

## Defining some constant variables

In [None]:
font = {'family': 'serif',
        'color':  'black',
        'weight': 'normal',
        'size': 12,
        }

## Reading the car velocity data
This data is used for most of the examples

In [None]:
df = pd.read_excel('data/SPEED.xlsx')

# rescale time
c = -1
def helper():
    global c
    c += 1
    return c

df['t (s)'] = df['t (s)'].apply(lambda x: helper())

df.columns = ['time', 'velocity']
df

## PAA Figure

In [None]:
sub_df = df.iloc[:2000]

plt.figure(figsize=(8, 3))

plt.plot(sub_df['time'], sub_df['velocity'], label='Time Series')

n = 8
# padding is just needed for the plots
shift = len(sub_df)/(2*n)
paa_time = [v+shift if i % 2  else v-shift for i,v in enumerate(np.repeat(PAA(sub_df['time'], n), 2))]
paa_vel = list(np.repeat(PAA(sub_df['velocity'], n), 2))

plt.plot(paa_time, paa_vel, c='black', label='PAA representation')
plt.title('PAA with a fixed size of 8', fontdict=font)
plt.xlabel('time (s)', fontdict=font)
plt.ylabel('velocity (m/s)', fontdict=font)
plt.xlim([0, 2000])
plt.ylim([0, 23])
plt.legend()
plt.savefig('figures/PAA_8.pdf', bbox_inches='tight')

## SAX Figure

In [None]:
from scipy.stats import norm

In [None]:
s = 10000
e = 12000

sub_df = df.iloc[s:e]

plt.figure(figsize=(8, 3))

sub_df['normalized_velocity'] = (sub_df['velocity'] - sub_df['velocity'].mean()) / sub_df['velocity'].std()

plt.plot(sub_df['time'], sub_df['normalized_velocity'], label='Time Series')

n = 8
# padding is just needed for the plots
shift = len(sub_df)/(2*n)
paa_time = [v+shift if i % 2  else v-shift for i,v in enumerate(np.repeat(PAA(sub_df['time'], n), 2))]
paa_vel = list(np.repeat(PAA(sub_df['normalized_velocity'], n), 2))

alphabet = 'abcde' # string with all chars to use

a_size = len(alphabet)

# this will create |alphabet| points which are the discreet bounds that will be used in SAX
thresholds = norm.ppf(np.linspace(1/a_size, 1 - 1/a_size, a_size - 1))

plt.plot([s, e], [thresholds[0], thresholds[0]], '--', color='gray', linewidth='1', label='SAX Thresholds')
for t in thresholds[1:]:
    plt.plot([s, e], [t, t], '--', color='gray', linewidth='1')

print(thresholds)

plt.plot(paa_time, paa_vel, c='black', label='PAA representation')
plt.ylabel('normalized velocity', fontdict=font)
plt.xlabel('time (s)', fontdict=font)
plt.ylim([-1.65, 2.25])

ax = plt.gca()
y2 = ax.twinx()
ticks = np.array([thresholds[0]*1.8] + list(thresholds) + [thresholds[-1]*1.8])
ticks = (ticks[1:] + ticks[:-1])/2
plt.ylim([-1.65, 2.25])
plt.yticks(ticks, list(alphabet)[::-1], fontdict=font)


plt.title('SAX with an alphabet size of 5 and 8 sub-vectors', fontdict=font)
plt.xlim([s, e])
plt.legend()
plt.savefig('figures/SAX_2.pdf', bbox_inches='tight')

## Llyod's Algorithm Figure

In [None]:
from sklearn.cluster import KMeans
from scipy.spatial import Voronoi, voronoi_plot_2d

In [None]:
plt.figure(figsize=(6, 5))
nd1 = np.random.normal(0.5, 1.5, (10,2))
nd2 = np.random.normal(0, 0.3, (10,2))
nd3 = np.random.normal(1, 0.4, (10,2))
nd4 = np.random.normal(0.2, 1, (10,2))
points = np.concatenate([nd1, nd2, nd3, nd4])

kmeans = KMeans(n_clusters=4, n_init='auto').fit(points)
centroids = kmeans.cluster_centers_

plt.scatter(points[:, 0], points[:, 1])
plt.scatter(centroids[:, 0], centroids[:, 1], color='darkred', s=60)
plt.xticks([])
plt.yticks([])
plt.title("Lloyd's Algorithm with k=4", fontdict=font)

vor = Voronoi(centroids)
fig = voronoi_plot_2d(vor,plt.gca(), line_colors='red',line_width=1, line_alpha=1, point_size=0, show_vertices=False)

plt.savefig('figures/Lloyd.pdf', bbox_inches='tight')
plt.show()