# Features Rank

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# fix_yahoo_finance is used to fetch data 
import yfinance as yf
yf.pdr_override()

In [2]:
# input
symbol = 'AMD'
start = '2014-01-01'
end = '2019-01-01'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,3.95,3.95,3.98,3.84,3.85,20548400
2014-01-03,4.0,4.0,4.0,3.88,3.98,22887200
2014-01-06,4.13,4.13,4.18,3.99,4.01,42398300
2014-01-07,4.18,4.18,4.25,4.11,4.19,42932100
2014-01-08,4.18,4.18,4.26,4.14,4.23,30678700


In [3]:
dataset['Open_Close'] = (dataset['Open'] - dataset['Adj Close'])/dataset['Open']
dataset['High_Low'] = (dataset['High'] - dataset['Low'])/dataset['Low']
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Returns'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,Open_Close,High_Low,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014-01-03,4.0,4.0,4.0,3.88,3.98,22887200,-0.005025,0.030928,1,1,1,0.012658
2014-01-06,4.13,4.13,4.18,3.99,4.01,42398300,-0.029925,0.047619,1,1,1,0.0325
2014-01-07,4.18,4.18,4.25,4.11,4.19,42932100,0.002387,0.034063,0,1,0,0.012106
2014-01-08,4.18,4.18,4.26,4.14,4.23,30678700,0.01182,0.028986,0,0,0,0.0
2014-01-09,4.09,4.09,4.23,4.05,4.2,30667600,0.02619,0.044444,0,0,1,-0.021531


In [4]:
X = dataset.drop(['Adj Close', 'Buy_Sell'], axis=1)
Y = dataset['Buy_Sell'].values

In [5]:
print(X.shape)
print(Y.shape)

(1257, 10)
(1257,)


In [6]:
X = np.array(X).reshape(1257,-1)
y = np.array(Y).reshape(1257,-1)

In [7]:
from sklearn.svm import SVR
from sklearn.feature_selection import RFECV

# Instantiate estimator and feature selector
svr_mod = SVR(kernel="linear")
feat_selector = RFECV(svr_mod, cv=5)

# Fit
feat_selector = feat_selector.fit(X, y)

# Print support and ranking
print(feat_selector.support_)
print(feat_selector.ranking_)
print(X.columns)

In [8]:
from sklearn.linear_model import LarsCV

# Drop feature suggested not important in step 2
# X = X.drop('sex', axis=1)

# Instantiate
lars_model = LarsCV(cv=5, normalize=False)

# Fit
feat_selector = lars_model.fit(X, y)

# Print r-squared score and estimated alpha
print(lars_model.score(X, y))
print(lars_model.alpha_)

In [9]:
from sklearn.ensemble import RandomForestRegressor

# Instantiate
rf_model = RandomForestRegressor(max_depth=2, random_state=123, 
                               n_estimators=100, oob_score=True)

# Fit
rf_model.fit(X, y)

print(dataset.columns)
print(rf_model.feature_importances_)

(1257, 2)

In [10]:
from sklearn.ensemble import ExtraTreesRegressor

# Instantiate
ETR_model = ExtraTreesRegressor()

# Fit
ETR_model.fit(X, y)

print(dataset.columns)
print(ETR_model_mod.feature_importances_)

array([[-1.44899117,  0.76442088],
       [-1.13188616,  1.61698256],
       [-1.13629614,  0.62013725],
       ...,
       [ 3.40484299,  0.20020157],
       [ 3.47729094,  1.02990627],
       [ 3.20984775,  0.1234806 ]])