In [1]:
import pandas as pd
import random 
import itertools
import json

pd.set_option('display.max_rows', 50)


from functions import sharpe_ratio_calculation, generate_rand_portfolios, select_top_five, join_stocks_crypto, dtw_matrix_calc, run_min_variance, run_clustering_model

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
df_all_stocks = pd.read_csv('stocks_data.csv',index_col='Date')
cryptos_df = pd.read_csv('cryptos_data.csv', index_col='Date')

joined_df = join_stocks_crypto(cryptos_df, df_all_stocks, mode = 'crypto_left') #mode - either do left with crypto and fill NA for stocks or do left on stocks and leave out some dates

In [3]:
#Random Portfolios generation
tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)

In [4]:
#Select top five sharpe ratio portfolios from a portfolio
sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
top_five_dict = select_top_five(random_portfolios, metric=sharpe_ratio)

In [5]:
# for i in range(1,20):

#     print('Doing', i)

#     top_five_sets = dict(itertools.islice(top_five_dict.items(), i, i+1))
#     results = run_min_variance(df_all_stocks, top_five_sets) 
#     with open(f"min_variance_portfolio_jsons/my_dict{i}.json", "w") as f:
#         json.dump(results, f, indent=4)

In [6]:
# dtw_matrix = dtw_matrix_calc(joined_df)

# labels_kmeans = run_clustering_model(joined_df, n_clus=3, model_name='kmeans')

# labels_kshape = run_clustering_model(joined_df, n_clus=3, model_name='kshape')

# labels_ahc = run_clustering_model(joined_df, n_clus=3, model_name='ahc', linkage='single')

In [7]:
#Compare based on Silhouette Score and Elbow method
from sklearn.metrics import silhouette_score


# n_clusters_list = [3,5,7,10]



# def test_for_silhouette_score(df, n_clusters_list, method='kmeans'):
#     dtw_matrix = dtw_matrix_calc(df)

#     silhouettes = list()
#     for n in n_clusters_list:
#         labels, t = run_clustering_model(df, n_clus=n, model_name=method)
#         score = silhouette_score(dtw_matrix, labels, metric='precomputed')
#         silhouettes.append(int(score))


#     silhouettes_df = pd.DataFrame({'clusters': n_clusters_list, 'silhouette scores': silhouettes})
#     return silhouettes_df

In [8]:
n_clusters_list = [3,5,7,10]
def test_for_silhouette_score(df, n_clusters_list, method='kmeans', linkage_list=None):
    dtw_matrix = dtw_matrix_calc(df)
    silhouettes = []

    # If AHC, require a list of linkage types
    if method == 'ahc':
        if linkage_list is None:
            raise ValueError("You must provide a list of linkages when using method='ahc'")
        
        for linkage in linkage_list:
            for n in n_clusters_list:
                labels, t = run_clustering_model(df, n_clus=n, model_name=method, linkage=linkage)
                score = silhouette_score(dtw_matrix, labels, metric='precomputed')
                silhouettes.append({
                    'clusters': n,
                    'silhouette_score': float(score),
                    'method': method,
                    'linkage': linkage
                })
    
    else:
        for n in n_clusters_list:
            labels, t = run_clustering_model(df, n_clus=n, model_name=method)
            score = silhouette_score(dtw_matrix, labels, metric='precomputed')
            silhouettes.append({
                'clusters': n,
                'silhouette_score': float(score),
                'method': method
            })

    return pd.DataFrame(silhouettes)

In [9]:
silhouette_df = test_for_silhouette_score(df_all_stocks, n_clusters_list, method='ahc', linkage_list=['single', 'average'])



In [10]:
silhouette_df

Unnamed: 0,clusters,silhouette_score,method,linkage
0,3,0.161868,ahc,single
1,5,0.025196,ahc,single
2,7,0.015752,ahc,single
3,10,-0.028004,ahc,single
4,3,0.201038,ahc,average
5,5,0.078158,ahc,average
6,7,0.057452,ahc,average
7,10,0.044886,ahc,average
