## OPIIF_Clustering

In [1]:
from IPython.display import HTML

input_form = """
<a id="admin_link" target="_blank" href="#">Ajenti Administration Interface</a>
<p>User: root<br> Password: admin</p>
"""

javascript = """
<script type="text/Javascript">
document.getElementById('admin_link').href = "https://" + window.location.hostname + ":8000"
</script>
"""

HTML(input_form + javascript)

### Importar Librerias

In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.finance import candlestick, quotes_historical_yahoo, date2num
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize

from datetime import datetime, timedelta

pd.options.display.max_columns=50

### Funciones a utilizar

####  Descargar precios Yahoo Finance

In [51]:
def download_data(symbol, days_delta=60):
    finish_date = datetime.today()
    start_date = finish_date - timedelta(days=days_delta)

    stocks_raw = quotes_historical_yahoo(symbol, start_date, finish_date)
    stocks_df = pd.DataFrame(stocks_raw, columns=["n_date", "open", "close", "high", "low", "volume"])
    return stocks_df

####  Configuracion de fecha

In [52]:
def process_date(stocks_df):
    stocks_df["n_date"] = stocks_df["n_date"].astype(np.int32)
    stocks_df["date"] = stocks_df["n_date"].apply(datetime.fromordinal)
    return stocks_df

#### Calculo de estadisticas

In [53]:
def calculate_stats(stocks_df):
    stocks_df["average"] = (stocks_df["close"] + stocks_df["high"] + stocks_df["low"]) / 3.0
    stocks_df["change_amount"] = stocks_df["close"] - stocks_df["open"]
    stocks_df["change_per"] = stocks_df["change_amount"] / stocks_df["average"]
    stocks_df["range"] = (stocks_df["high"] - stocks_df["low"]) / stocks_df["average"]
    stocks_df["change_1_amount"] = pd.Series(0.0)
    stocks_df["change_1_amount"][1:] = stocks_df["average"][1:].values - stocks_df["average"][:-1].values
    stocks_df["change_1_per"] = stocks_df["change_1_amount"] / stocks_df["average"]
    return stocks_df

#### Ajuste de datos para Clustering

In [54]:
def pivot_data(stocks_df, values="change_1_per"):
    clustering_data = stocks_df.pivot(index="Ticker", columns="n_date", values=values)
    return clustering_data

#### Clustering

In [55]:
def cluster_data(data, n_clusters=8, normalize_data=False):
    if normalize_data:
        data = normalize(data.values, norm='l2', axis=1, copy=True)
    cluster_model = KMeans(n_clusters=n_clusters)
    prediction = cluster_model.fit_predict(data)
    return prediction, cluster_model, data

#### Visualizar Clusters

In [56]:
def visualize_clusters(data_df, values="change_1_per", n_clusters=8, normalize_data=False):
    data = pivot_data(data_df, values)
    prediction, model, c_data = cluster_data(data, n_clusters=n_clusters, normalize_data=normalize_data)
    c_data = pd.DataFrame(c_data, index=data.index,columns=data.columns)
    data["Cluster"] = prediction
    c_data["Cluster"] = prediction
    plt.figure
    for cluster in np.unique(prediction):
        plt.plot(model.cluster_centers_[cluster], "o-", alpha=0.5, linewidth=2)
    plt.show()
    for cluster in np.unique(prediction):
        temp_cluster_data = c_data[c_data["Cluster"]==cluster]
        print "Cluster: %s" % cluster
        print "Members: %s" % ["%s: %s"% (symbol, stock_dict[symbol]) for symbol in list(temp_cluster_data.index)]
        plt.figure()
        plt.title("Cluster#: %s" % cluster)
        plt.plot(model.cluster_centers_[cluster], "o--", alpha=0.5, linewidth=2)
        for symbol in temp_cluster_data.index:
            plt.plot(np.ravel(temp_cluster_data.loc[[symbol]].drop("Cluster", 1).values),
                     alpha=0.2, linewidth=2)
            
        plt.grid()
        plt.show();
    return prediction, model, c_data

#### Medicion de desempeno Cluster

In [57]:
def measure_error(prediction, model, c_data):
    error_score = []
    for counter in range(len(c_data)):
        true_val = c_data.drop("Cluster",1).values[counter]
        center_val = model.cluster_centers_[c_data["Cluster"][counter]]

        error_score.append(np.average(np.abs(true_val - center_val)) / np.average(center_val))
    
    cluster_counts = c_data["Cluster"].value_counts()
    
    return np.average(error_score), len(cluster_counts[cluster_counts==1])

In [58]:
stock_dict={"ALFAA.MX": "ALFA.A",
            "ALPEKA.MX": "ALPEK.A",
            "ALSEA.MX": "ALSEA",
            "AMXL.MX": "AMX.L",
            "ASURB.MX": "ASUR.B",
            "BIMBOA.MX": "BIMBO.A",
            "BOLSAA.MX": "BOLSA.A",
            "CEMEXCPO.MX": "CEMEX.CPO",
            "COMERCIUBC.MX": "COMERCI.UBC",
            "ELEKTRA.MX": "ELEKTRA",
            "GAPB.MX": "GAP.B",
            "GENTERA.MX": "GENTERA",
            "GFINBURO.MX": "GFINBUR.O",
            "GFNORTEO.MX": "GFNORTE.O",
            "GFREGIOO.MX": "GFREGIO.O",
            "GMEXICOB.MX": "GMEXICO.B",
            "GRUMAB.MX": "GRUMA.B",
            "GSANBORB-1.MX": "GSANBOR.B-1",
            "ICA.MX": "ICA",
            "ICHB.MX": "ICH.B",
            "IENOVA.MX": "IENOVA",
            "KIMBERA.MX": "KIMBER.A",
            "KOFL.MX": "KOFL",
            "LABB.MX": "LAB.B",
            "LALAB.MX": "LALA.B",
            "LIVEPOLC-1.MX": "LIVEPOL.C-1",
            "MEXCHEM.MX": "MEXCHEM",
            "OHLMEX.MX": "OHLMEX",
            "PINFRA.MX": "PINFRA",
            "SANMEXB.MX": "SANMEX.B",
            "TLEVISACPO.MX": "TLEVISA.CPO",
            "WALMEX.MX": "WALMEX",
           }
symbols = stock_dict.keys()
names = stock_dict.values()

stocks_data = pd.DataFrame(symbols, columns=["Ticker"])
stocks_data["NAIC"] = names
stocks_data

Unnamed: 0,Ticker,NAIC
0,ICHB.MX,ICH.B
1,GMEXICOB.MX,GMEXICO.B
2,MEXCHEM.MX,MEXCHEM
3,ELEKTRA.MX,ELEKTRA
4,GFINBURO.MX,GFINBUR.O
5,IENOVA.MX,IENOVA
6,PINFRA.MX,PINFRA
7,ALPEKA.MX,ALPEK.A
8,WALMEX.MX,WALMEX
9,ALFAA.MX,ALFA.A


In [59]:
temp_list = []
for symbol in stocks_data["Ticker"]:
    temp_data = download_data(symbol)
    process_date(temp_data)
    calculate_stats(temp_data)
    temp_data["Ticker"] = symbol
    temp_list.append(temp_data)

stocks_df = pd.concat(temp_list)
stocks_df

Unnamed: 0,n_date,open,close,high,low,volume,date,average,change_amount,change_per,range,change_1_amount,change_1_per,Ticker
0.0,735955,54.900000,54.54000,55.350000,54.260000,354900,2015-12-23,54.716667,-0.360000,-0.006579,0.019921,0.000000,0.000000,ICHB.MX
1.0,735956,55.000000,55.05000,55.540000,54.880000,61800,2015-12-24,55.156667,0.050000,0.000907,0.011966,0.440000,0.007977,ICHB.MX
2.0,735957,55.050000,55.05000,55.050000,55.050000,0,2015-12-25,55.050000,0.000000,0.000000,0.000000,-0.106667,-0.001938,ICHB.MX
3.0,735960,55.540000,54.92000,55.590000,54.670000,57400,2015-12-28,55.060000,-0.620000,-0.011260,0.016709,0.010000,0.000182,ICHB.MX
4.0,735961,54.910000,56.09000,56.670000,54.910000,147400,2015-12-29,55.890000,1.180000,0.021113,0.031490,0.830000,0.014851,ICHB.MX
5.0,735962,56.000000,55.40000,57.800000,55.090000,251900,2015-12-30,56.096667,-0.600000,-0.010696,0.048309,0.206667,0.003684,ICHB.MX
6.0,735963,55.590000,55.67000,56.410000,54.900000,86700,2015-12-31,55.660000,0.080000,0.001437,0.027129,-0.436667,-0.007845,ICHB.MX
7.0,735964,55.670000,55.67000,55.670000,55.670000,0,2016-01-01,55.670000,0.000000,0.000000,0.000000,0.010000,0.000180,ICHB.MX
8.0,735967,55.670000,54.93000,55.670000,54.690000,170800,2016-01-04,55.096667,-0.740000,-0.013431,0.017787,-0.573333,-0.010406,ICHB.MX
9.0,735968,55.300000,54.70000,55.450000,54.270000,734000,2016-01-05,54.806667,-0.600000,-0.010948,0.021530,-0.290000,-0.005291,ICHB.MX


In [60]:
clustering_data = pivot_data(stocks_df, values="change_amount")
clustering_data

n_date,735955,735956,735957,735960,735961,735962,735963,735964,735967,735968,735969,735970,735971,735974,735975,735976,735977,735978,735981,735982,735983,735984,735985,735988,735989,735990,735991,735992,735995,735996,735997,735998,735999,736002,736003,736004,736005,736006,736009,736010,736011,736012,736013
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
ALFAA.MX,0.39,-0.27,0,-0.05,-0.32,-0.42,-0.02,0,-0.25,0.09,-0.34,-1.97,-0.82,0.52,-0.48,-0.62,-0.41,0.48,-0.12,0.47,0.51,0.1,-0.05,0.4,0.12,-0.05,0.27,1.42,0,-1.62,-0.03,0.1,-0.47,-0.16,1.26,0.21,-0.36,-0.75,0.42,-0.26,0.29,-0.38,-0.91
ALPEKA.MX,-0.35,-0.02,0,-0.11,0.4,-0.06,-0.02,0,0.07,-0.48,-1.12,-0.23,0.3,-0.52,-0.78,0.48,-0.14,0.0,-0.12,0.17,-0.81,0.85,0.51,-0.35,-0.34,-0.32,0.16,0.47,0,-0.22,0.76,0.12,0.09,-0.22,0.62,-0.04,-0.07,0.4,-0.12,-0.2,0.07,0.81,0.22
ALSEA.MX,-0.19,-0.19,0,1.04,0.12,-0.7,1.25,0,0.43,0.19,-0.01,-2.26,-2.06,1.08,1.23,0.01,-0.48,0.75,0.58,-0.45,-0.01,2.53,0.65,0.36,0.11,-1.12,1.08,1.7,0,-1.24,-0.27,-0.92,1.91,2.04,0.85,0.31,-0.54,-3.01,-0.06,-0.75,0.3,-0.38,-0.89
AMXL.MX,-0.03,-0.11,0,-0.18,0.28,0.05,-0.22,0,0.05,-0.02,-0.21,-0.36,-0.49,0.19,0.0,0.15,0.24,-0.21,0.04,0.01,0.29,0.27,0.27,-0.22,0.43,0.28,0.22,0.23,0,0.0,0.16,0.14,-0.19,-0.03,-0.35,-0.85,0.07,0.33,0.34,-0.21,-0.02,-0.21,-0.27
ASURB.MX,1.61,-0.16,0,-3.29,-1.1,-3.22,-0.11,0,-2.81,-5.46,-1.17,-0.28,-1.61,-1.77,1.85,-5.29,-1.47,-0.68,2.35,0.04,-0.07,1.99,1.54,4.25,1.82,-5.05,1.56,6.72,0,-3.44,-1.21,1.93,-0.05,-1.13,-0.77,3.3,2.46,-1.18,-0.24,2.24,5.07,-2.74,-4.28
BIMBOA.MX,0.5,-0.49,0,0.06,-0.05,-1.24,0.54,0,-0.03,0.78,0.86,0.66,0.16,0.76,0.09,0.72,-0.16,-0.32,-0.46,-0.24,0.64,-1.83,0.17,-0.39,0.94,0.11,1.03,1.76,0,1.41,0.0,0.19,-1.34,0.91,0.94,0.55,-1.2,0.01,0.78,-0.02,-0.33,0.73,1.23
BOLSAA.MX,0.0,0.12,0,-0.09,-0.07,0.01,-0.19,0,-0.33,-0.02,-0.05,-0.46,-0.38,-0.1,-0.21,0.1,0.67,-0.19,0.17,0.12,0.32,-0.13,-0.18,0.41,0.03,0.23,0.55,0.7,0,0.86,-0.69,-0.04,-0.75,-0.23,-0.15,-0.4,-0.43,0.64,0.09,0.58,0.33,0.66,0.33
CEMEXCPO.MX,0.57,0.07,0,-0.31,-0.15,-0.09,0.23,0,-0.17,-0.12,0.08,-0.61,-0.68,0.04,0.08,-0.31,0.18,0.13,-0.07,-0.47,0.05,0.55,0.0,-0.58,0.13,0.0,0.0,0.34,0,0.04,0.37,0.62,-0.2,-0.55,0.01,-0.16,0.1,0.17,0.17,-0.1,0.68,-0.4,0.07
COMERCIUBC.MX,-0.52,0.45,0,-1.05,-0.35,-2.23,0.01,0,-0.69,0.06,0.0,0.76,0.17,-0.31,-0.05,-0.34,-0.1,0.43,-0.16,0.02,0.37,-0.18,0.75,0.5,-0.26,-0.51,0.21,0.04,0,-0.34,-0.11,0.03,-0.5,-0.02,1.07,0.05,0.76,-0.04,0.06,0.0,0.57,0.05,-0.03
ELEKTRA.MX,-1.41,-2.75,0,-9.31,-13.88,-8.81,3.04,0,-8.57,-12.69,-14.62,-2.9,-4.55,17.03,-1.03,-14.62,-0.76,20.18,-2.93,-17.86,-9.13,4.49,4.07,4.54,7.54,-0.95,4.02,1.12,0,-0.25,-2.05,-4.67,0.25,-1.08,8.14,5.66,-4.72,-1.09,5.45,-5.39,7.7,-12.0,-3.31


In [61]:
norm_data = normalize(clustering_data.values, axis=1)
norm_data = pd.DataFrame(norm_data)
for item in norm_data.values:
    plt.plot(item)
plt.show();

In [62]:
prediction, model, data = cluster_data(clustering_data, n_clusters=8, normalize_data=True)
print "Cluster Count: %s" % len(np.unique(prediction))
clustering_data["Cluster"] = prediction
clustering_data

Cluster Count: 8


n_date,735955,735956,735957,735960,735961,735962,735963,735964,735967,735968,735969,735970,735971,735974,735975,735976,735977,735978,735981,735982,735983,735984,735985,735988,735989,735990,735991,735992,735995,735996,735997,735998,735999,736002,736003,736004,736005,736006,736009,736010,736011,736012,736013,Cluster
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
ALFAA.MX,0.39,-0.27,0,-0.05,-0.32,-0.42,-0.02,0,-0.25,0.09,-0.34,-1.97,-0.82,0.52,-0.48,-0.62,-0.41,0.48,-0.12,0.47,0.51,0.1,-0.05,0.4,0.12,-0.05,0.27,1.42,0,-1.62,-0.03,0.1,-0.47,-0.16,1.26,0.21,-0.36,-0.75,0.42,-0.26,0.29,-0.38,-0.91,1
ALPEKA.MX,-0.35,-0.02,0,-0.11,0.4,-0.06,-0.02,0,0.07,-0.48,-1.12,-0.23,0.3,-0.52,-0.78,0.48,-0.14,0.0,-0.12,0.17,-0.81,0.85,0.51,-0.35,-0.34,-0.32,0.16,0.47,0,-0.22,0.76,0.12,0.09,-0.22,0.62,-0.04,-0.07,0.4,-0.12,-0.2,0.07,0.81,0.22,3
ALSEA.MX,-0.19,-0.19,0,1.04,0.12,-0.7,1.25,0,0.43,0.19,-0.01,-2.26,-2.06,1.08,1.23,0.01,-0.48,0.75,0.58,-0.45,-0.01,2.53,0.65,0.36,0.11,-1.12,1.08,1.7,0,-1.24,-0.27,-0.92,1.91,2.04,0.85,0.31,-0.54,-3.01,-0.06,-0.75,0.3,-0.38,-0.89,1
AMXL.MX,-0.03,-0.11,0,-0.18,0.28,0.05,-0.22,0,0.05,-0.02,-0.21,-0.36,-0.49,0.19,0.0,0.15,0.24,-0.21,0.04,0.01,0.29,0.27,0.27,-0.22,0.43,0.28,0.22,0.23,0,0.0,0.16,0.14,-0.19,-0.03,-0.35,-0.85,0.07,0.33,0.34,-0.21,-0.02,-0.21,-0.27,0
ASURB.MX,1.61,-0.16,0,-3.29,-1.1,-3.22,-0.11,0,-2.81,-5.46,-1.17,-0.28,-1.61,-1.77,1.85,-5.29,-1.47,-0.68,2.35,0.04,-0.07,1.99,1.54,4.25,1.82,-5.05,1.56,6.72,0,-3.44,-1.21,1.93,-0.05,-1.13,-0.77,3.3,2.46,-1.18,-0.24,2.24,5.07,-2.74,-4.28,1
BIMBOA.MX,0.5,-0.49,0,0.06,-0.05,-1.24,0.54,0,-0.03,0.78,0.86,0.66,0.16,0.76,0.09,0.72,-0.16,-0.32,-0.46,-0.24,0.64,-1.83,0.17,-0.39,0.94,0.11,1.03,1.76,0,1.41,0.0,0.19,-1.34,0.91,0.94,0.55,-1.2,0.01,0.78,-0.02,-0.33,0.73,1.23,5
BOLSAA.MX,0.0,0.12,0,-0.09,-0.07,0.01,-0.19,0,-0.33,-0.02,-0.05,-0.46,-0.38,-0.1,-0.21,0.1,0.67,-0.19,0.17,0.12,0.32,-0.13,-0.18,0.41,0.03,0.23,0.55,0.7,0,0.86,-0.69,-0.04,-0.75,-0.23,-0.15,-0.4,-0.43,0.64,0.09,0.58,0.33,0.66,0.33,5
CEMEXCPO.MX,0.57,0.07,0,-0.31,-0.15,-0.09,0.23,0,-0.17,-0.12,0.08,-0.61,-0.68,0.04,0.08,-0.31,0.18,0.13,-0.07,-0.47,0.05,0.55,0.0,-0.58,0.13,0.0,0.0,0.34,0,0.04,0.37,0.62,-0.2,-0.55,0.01,-0.16,0.1,0.17,0.17,-0.1,0.68,-0.4,0.07,0
COMERCIUBC.MX,-0.52,0.45,0,-1.05,-0.35,-2.23,0.01,0,-0.69,0.06,0.0,0.76,0.17,-0.31,-0.05,-0.34,-0.1,0.43,-0.16,0.02,0.37,-0.18,0.75,0.5,-0.26,-0.51,0.21,0.04,0,-0.34,-0.11,0.03,-0.5,-0.02,1.07,0.05,0.76,-0.04,0.06,0.0,0.57,0.05,-0.03,7
ELEKTRA.MX,-1.41,-2.75,0,-9.31,-13.88,-8.81,3.04,0,-8.57,-12.69,-14.62,-2.9,-4.55,17.03,-1.03,-14.62,-0.76,20.18,-2.93,-17.86,-9.13,4.49,4.07,4.54,7.54,-0.95,4.02,1.12,0,-0.25,-2.05,-4.67,0.25,-1.08,8.14,5.66,-4.72,-1.09,5.45,-5.39,7.7,-12.0,-3.31,7


In [63]:
prediction, model, c_data = visualize_clusters(stocks_df, values="change_amount", n_clusters=8, normalize_data=True);

Cluster: 0
Members: ['AMXL.MX: AMX.L', 'CEMEXCPO.MX: CEMEX.CPO', 'GMEXICOB.MX: GMEXICO.B', 'MEXCHEM.MX: MEXCHEM', 'SANMEXB.MX: SANMEX.B']
Cluster: 1
Members: ['ALFAA.MX: ALFA.A', 'ALSEA.MX: ALSEA', 'ASURB.MX: ASUR.B', 'GENTERA.MX: GENTERA', 'GFNORTEO.MX: GFNORTE.O', 'GFREGIOO.MX: GFREGIO.O', 'KIMBERA.MX: KIMBER.A']
Cluster: 2
Members: ['GSANBORB-1.MX: GSANBOR.B-1', 'IENOVA.MX: IENOVA', 'LIVEPOLC-1.MX: LIVEPOL.C-1']
Cluster: 3
Members: ['GAPB.MX: GAP.B', 'GFINBURO.MX: GFINBUR.O', 'LABB.MX: LAB.B', 'PINFRA.MX: PINFRA']
Cluster: 4
Members: ['ALPEKA.MX: ALPEK.A', 'ICHB.MX: ICH.B', 'OHLMEX.MX: OHLMEX']
Cluster: 5
Members: ['BIMBOA.MX: BIMBO.A', 'COMERCIUBC.MX: COMERCI.UBC', 'ELEKTRA.MX: ELEKTRA', 'ICA.MX: ICA', 'LALAB.MX: LALA.B', 'TLEVISACPO.MX: TLEVISA.CPO', 'WALMEX.MX: WALMEX']
Cluster: 6
Members: ['BOLSAA.MX: BOLSA.A', 'GRUMAB.MX: GRUMA.B']
Cluster: 7
Members: ['KOFL.MX: KOFL']


In [64]:
measure_error(prediction, model, c_data)

(-13.014854954903203, 1)

#### Busqueda del numero optimo de clusters

In [65]:
max_clusters = 30
feature = "average"
clustering_data = pivot_data(stocks_df, values=feature)
clustering_data["Cluster"] = pd.Series()
for normalize_data in [True, False]:
    fig = plt.figure(figsize=(10,6))
    plt.title("K-Means - Feature: %s Normalized: %s" % (feature, normalize_data))
    axes_1 = fig.add_subplot(111)
    axes_2 = axes_1.twinx()
    score_error_list = []
    failed_clusters_list = []
    
    for n_clusters in range(2,max_clusters):
        prediction, model, data = cluster_data(clustering_data.drop("Cluster",1), n_clusters=n_clusters,
                                               normalize_data=normalize_data)
        data = pd.DataFrame(data, index=clustering_data.index,columns=clustering_data.drop("Cluster",1).columns)
        data["Cluster"] = prediction
        score_error, failed_clusters =  measure_error(prediction, model, data)
        score_error_list.append(score_error)
        failed_clusters_list.append(failed_clusters)
    axes_1.plot(range(2,max_clusters), score_error_list, "ro-", label = "Average Error")
    axes_2.plot(range(2,max_clusters), failed_clusters_list, "bo-", label = "Failed Cluster")
    
    axes_1.grid()
    axes_1.legend(loc = "lower center")
    axes_2.legend(loc = "upper center")
    axes_1.set_ylabel("Average Error")
    axes_2.set_ylabel("Failed Cluster")
    axes_1.set_xlabel("Clusters")
    plt.show()