In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set(color_codes=True, style="ticks")
sns.set(style='whitegrid', context='notebook')
from mpld3._display import display as d3_display

from ipywidgets import widgets, interactive, interact, Layout, HTML as ipyHTML
from IPython.display import display, HTML, clear_output, display_html

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)

In [3]:
df_statquant_fillmin = pd.read_pickle('df_sample.pkl')
df = df_statquant_fillmin[['price_future_12m', 'price', 'f__roe', 'f__bv_to_marketcap', 'f__stock_performance_12m',
                      'f__earnings_to_marketcap']].sample(frac=0.01)

In [16]:
"""Prepare data for regression"""
X = df.drop('price_future_12m', axis = 1).values
y = df['price_future_12m'].values

## Random Forest

In [4]:
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(n_estimators=10,criterion='mse',random_state=1, max_depth = 2)
forest.fit(X,y)
y_pred = forest.predict(X)            

In [19]:
forest.estimators_[0]
#dir(forest)
#dir(forest)[0:3]

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features='auto',
           max_leaf_nodes=None, min_impurity_split=1e-07,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False,
           random_state=1791095845, splitter='best')

In [20]:
from sklearn.tree import export_graphviz
export_graphviz(forest.estimators_[0],
      out_file='rftree.dot')
      #feature_names=['price', 'f__roe', 'f__bv_to_marketcap', 'f__stock_performance_12m',
                      #'f__earnings_to_marketcap'])

## Tree Regressor

In [21]:
from sklearn.tree import DecisionTreeRegressor
rgtree = DecisionTreeRegressor(criterion='mse', max_depth=5)
rgtree.fit(X,y)
y_pred = rgtree.predict(X)

In [22]:
from sklearn.tree import export_graphviz
export_graphviz(rgtree, 
                out_file='rgtree.dot',
                feature_names = ['price', 'f__roe', 'f__bv_to_marketcap', 'f__stock_performance_12m', 'f__earnings_to_marketcap']
               )

## Decision Tree Classifier

In [44]:
"""Prepare data for classification"""
df['price_return'] = (df['price_future_12m'].values - df['price']) / df['price']
df.loc[(df.price_return >= 0), 'long_short'] = 1
df.loc[(df.price_return < 0), 'long_short'] = -1

X = df.drop(['price_future_12m', 'price_return', 'long_short'], axis = 1).values
y = df['long_short'].values

In [45]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
tree.fit(X, y)
y_pred = tree.predict(X)

In [46]:
from sklearn.tree import export_graphviz
export_graphviz(tree, 
                out_file='tree.dot',
                feature_names = ['price', 'f__roe', 'f__bv_to_marketcap', 'f__stock_performance_12m', 'f__earnings_to_marketcap']
               )

## Decision Tree Classifier for full statquant data

In [58]:
"""data preparation"""
exclu_feilds = ['date', 'tradingitemid', 'companyid', 'companyname', 'exchange', 'exchangeid', 'industry', 
                'sector', 'symbol', 'symbolstartdate', u'date_next', 'price_next', u'date_historical_12m', 
                u'date_historical_6m', u'date_historical_3m', u'date_future_1m', u'price_future_1m',
                u'price_growth_avg_1m', u'price_growth_std_1m', u'price_min_1m',
                u'price_max_1m', u'date_future_3m', u'price_future_3m',
                u'price_growth_avg_3m', u'price_growth_std_3m', u'price_min_3m',
                u'price_max_3m', u'date_future_6m', u'price_future_6m',
                u'price_growth_avg_6m', u'price_growth_std_6m', u'price_min_6m',
                u'price_max_6m', u'date_future_12m', #u'price_future_12m',
                u'price_growth_avg_12m', u'price_growth_std_12m', u'price_min_12m',
                u'price_max_12m', u'date_future_24m', u'price_future_24m',
                u'price_growth_avg_24m', u'price_growth_std_24m', u'price_min_24m',
                u'price_max_24m', u'date_future_36m', u'price_future_36m',
                u'price_growth_avg_36m', u'price_growth_std_36m', u'price_min_36m',
                u'price_max_36m']

df_full_statquant = df_statquant_fillmin.drop(exclu_feilds, axis=1).sample(frac = 0.1)
"""reclassify target variable"""
df_full_statquant['price_return'] = (df_full_statquant['price_future_12m'].values - df_full_statquant['price']) / df_full_statquant['price']
df_full_statquant.loc[(df_full_statquant.price_return >= 0), 'long_short'] = 1
df_full_statquant.loc[(df_full_statquant.price_return < 0), 'long_short'] = 0

X = df_full_statquant.drop(['price_future_12m', 'price_return', 'long_short'], axis = 1).values
y = df_full_statquant['long_short'].values


In [66]:
"""features to look at"""
all_features = df_full_statquant.drop(['price_future_12m', 'price_return', 'long_short'], axis = 1).columns.values

In [108]:
"""20 random trees for depth = 3"""
import random 
from sklearn.tree import DecisionTreeClassifier
trees_depth3 = []
y_preds = []
i = 0
for n in range(20):
    trees_depth3.append(DecisionTreeClassifier(criterion='entropy', 
                                               max_depth=3, 
                                               random_state=n))
    trees_depth3[n].fit(X, y)
    y_preds.append(trees_depth3[n].predict(X))
    
    """output file"""
    with open('tree_depth3_' + str(i) + '.dot', 'w') as my_file:
        my_file = export_graphviz(trees_depth3[n],
                                  out_file = my_file,
                                  feature_names = all_features
                                 )
        i = i + 1

In [102]:
i_tree = 0
for tree_in_forest in forest.estimators_:
    with open('tree_' + str(i_tree) + '.dot', 'w') as my_file:
        my_file = tree.export_graphviz(tree_in_forest, out_file = my_file)
    i_tree = i_tree + 1


[2]

In [None]:
i_tree = 0
for tree_in_forest in forest.estimators_:
    with open('tree_' + str(i_tree) + '.dot', 'w') as my_file:
        my_file = tree.export_graphviz(tree_in_forest, out_file = my_file)
    i_tree = i_tree + 1

In [71]:
from sklearn.tree import export_graphviz
export_graphviz(tree, 
                out_file='statquant_tree3.dot',
                feature_names = all_features)
                