# K-Means with Dynamic Time Warping

In [None]:
!pip install cython
!pip install tslearn

In [6]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import os

def create_connection():
    host = 'localhost'
    port = '3333'
    return create_engine("postgresql://infocup@{}:{}/infocup".format(host,port))

In [7]:
con = create_connection()

active_stations = pd.read_sql_query("""SELECT station_id,time_stamp, price 
FROM prices_sampled 
WHERE price > 0 AND station_id <= 250 AND date_part('hour', time_stamp)::int %% 8 = 0
AND time_stamp >= '2017-06-01'""", con)
active_stations['time_stamp'] = active_stations['time_stamp'].astype(np.int64) // 10**9
active_stations.tail(10)

Unnamed: 0,station_id,time_stamp,price
80582,250,1505750400,1309
80583,250,1505779200,1399
80584,250,1505808000,1399
80585,250,1505836800,1339
80586,250,1505865600,1419
80587,250,1505894400,1399
80588,250,1505923200,1339
80589,250,1505952000,1419
80590,250,1505980800,1399
80591,250,1506009600,1379


In [8]:
active_stations = active_stations[np.isfinite(active_stations['price'])]
active_stations.dropna(inplace=True)
active_stations.loc[np.isnan(active_stations['price'])]
prices_pivoted = active_stations.pivot_table(index='time_stamp', columns='station_id', values='price',fill_value=0)


In [9]:
from tslearn.clustering import TimeSeriesKMeans
km = TimeSeriesKMeans(n_clusters=10, metric="dtw",max_iter=1)
km.fit(prices_pivoted.as_matrix())

155812.668 --> 


TimeSeriesKMeans(dtw_inertia=False, max_iter=1, max_iter_barycenter=100,
         metric='dtw', metric_params=None, n_clusters=10, n_init=1,
         random_state=None, tol=1e-06, verbose=True)

In [10]:
y_pred = km.predict(prices_pivoted.as_matrix())

In [11]:
y_pred

array([0, 8, 8, 0, 8, 8, 0, 0, 8, 0, 0, 8, 0, 0, 8, 0, 8, 8, 0, 8, 8, 0, 8,
       8, 0, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 3, 7, 9, 3, 7,
       9, 3, 7, 1, 3, 7, 1, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3,
       7, 1, 3, 7, 1, 3, 7, 1, 3, 7, 9, 3, 7, 1, 3, 7, 9, 3, 7, 9, 3, 7, 1,
       3, 7, 1, 3, 7, 1, 3, 5, 3, 3, 7, 1, 3, 7, 1, 3, 7, 1, 3, 7, 1, 3, 7,
       1, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3, 7, 9, 3, 7, 1, 3, 7, 1, 3,
       4, 1, 3, 4, 1, 3, 7, 1, 3, 7, 1, 3, 4, 9, 3, 4, 1, 3, 5, 9, 3, 7, 9,
       3, 7, 1, 3, 7, 9, 3, 7, 9, 3, 7, 1, 3, 7, 1, 3, 4, 1, 3, 4, 1, 3, 4,
       1, 3, 4, 1, 3, 4, 1, 3, 4, 9, 3, 4, 1, 3, 4, 1, 9, 6, 1, 3, 4, 1, 3,
       4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 9, 6, 1, 9, 4, 1, 3, 5, 9,
       3, 4, 9, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4,
       1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 4, 1, 3, 6, 1, 9,
       6, 1, 1, 6, 6, 1, 6, 6, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1,
       1, 6,