In [1]:
import pandas as pd
import numpy as np

In [2]:
import include.GA_clsData as DA
import include.GA_clsModel as MO

### Genes and params

In [3]:
df = pd.read_csv('hyperparameters/AdaBoostClassifier_hyperparameters.csv')

In [4]:
df.sort_values(by='fitness', ascending=False, inplace=True)

In [5]:
genes={}
for _, row in df.head(1).iterrows():
    for col in df.columns:
        if col == 'fitness': continue
        value = 0
        if col[0] == 'i': value = int(row[col])
        if col[0] == 'f': value = float(row[col])
        genes[col] = value

In [6]:
params = []
params.append(['periods_away_to_predict',1])
params.append(['get_target',True])
params.append(['start_new', True])
params.append(['initial_population', 1])
#params.append(['training_file', 'data/EURUSD_M5_2014-01-01--2014-01-31.csv'])
params.append(['training_file', 'data/EURUSD_M5_2014-01-01--2018-12-31.csv'])

param_dict = {}
for p in params:
    param_dict[p[0]] = p[1]
params = param_dict

### Data

In [7]:
data = DA.Data(genes,params)

In [8]:
data.getDataSet()

In [9]:
data.getXy(number_validations = 10000)

In [10]:
X_train = data.X_train
X_val = data.X_val

y_train = data.y_train
y_val = data.y_val

### Classifiers

In [11]:
import include.Classifiers as cl
classifiers = cl.getClassifiers()

### Loop through and train

In [12]:
totry = {'SVC'}

In [13]:
df = pd.DataFrame()
target = y_val.reset_index(drop=True)
target = [1 if p==True else 0 for p in target]
df['target'] = target
for classifier in classifiers:
    classifier_name = type(classifier).__name__
    
    #if classifier_name not in totry: continue
    
    params['classifier_name'] = classifier_name

    model = MO.Model(genes, params, data)
    accuracy = model.train()
    print(f'Accuracy: {accuracy}')
    
    
    try:
        #pred = model.classifier.predict_proba(X_val)
        #pred = pred[:,0]
        pred = model.classifier.predict(X_val)
        pred = [1 if p==True else 0 for p in pred]
        df[classifier_name] = pred
    except:
        pass

Training: AdaBoostClassifier
Accuracy: 54.0
Training: BaggingClassifier
Accuracy: 51.9
Training: BernoulliNB
Accuracy: 53.2
Training: CalibratedClassifierCV
Accuracy: 54.6
Training: DecisionTreeClassifier
Accuracy: 51.2
Training: ExtraTreeClassifier
Accuracy: 51.0
Training: ExtraTreesClassifier
Accuracy: 53.2
Training: GradientBoostingClassifier
Accuracy: 54.7
Training: HistGradientBoostingClassifier
Accuracy: 54.6
Training: KNeighborsClassifier
Accuracy: 50.6
Training: LinearDiscriminantAnalysis
Accuracy: 53.9
Training: MLPClassifier
Accuracy: 53.9
Training: OneVsRestClassifier
Accuracy: 54.0
Training: QuadraticDiscriminantAnalysis




Accuracy: 50.4
Training: RandomForestClassifier
Accuracy: 53.2
Training: SGDClassifier
Accuracy: 49.4
Training: XGBClassifier
Accuracy: 53.8


In [14]:
df.shape

(10000, 18)

In [15]:
df.head()

Unnamed: 0,target,AdaBoostClassifier,BaggingClassifier,BernoulliNB,CalibratedClassifierCV,DecisionTreeClassifier,ExtraTreeClassifier,ExtraTreesClassifier,GradientBoostingClassifier,HistGradientBoostingClassifier,KNeighborsClassifier,LinearDiscriminantAnalysis,MLPClassifier,OneVsRestClassifier,QuadraticDiscriminantAnalysis,RandomForestClassifier,SGDClassifier,XGBClassifier
0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0
1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1
2,0,1,0,0,1,0,1,1,0,0,1,1,1,1,1,1,0,0
3,0,0,0,0,1,0,0,0,1,1,1,0,1,0,1,1,1,0
4,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1


In [16]:
df.describe()

Unnamed: 0,target,AdaBoostClassifier,BaggingClassifier,BernoulliNB,CalibratedClassifierCV,DecisionTreeClassifier,ExtraTreeClassifier,ExtraTreesClassifier,GradientBoostingClassifier,HistGradientBoostingClassifier,KNeighborsClassifier,LinearDiscriminantAnalysis,MLPClassifier,OneVsRestClassifier,QuadraticDiscriminantAnalysis,RandomForestClassifier,SGDClassifier,XGBClassifier
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.4986,0.4854,0.3811,0.5014,0.4834,0.4948,0.4984,0.4539,0.4562,0.4382,0.4972,0.5112,0.4933,0.4854,0.8939,0.4469,0.487,0.4681
std,0.500023,0.499812,0.485681,0.500023,0.499749,0.499998,0.500022,0.497895,0.498103,0.496191,0.500017,0.4999,0.49998,0.499812,0.307981,0.497197,0.499856,0.499006
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [17]:
df.columns

Index(['target', 'AdaBoostClassifier', 'BaggingClassifier', 'BernoulliNB',
       'CalibratedClassifierCV', 'DecisionTreeClassifier',
       'ExtraTreeClassifier', 'ExtraTreesClassifier',
       'GradientBoostingClassifier', 'HistGradientBoostingClassifier',
       'KNeighborsClassifier', 'LinearDiscriminantAnalysis', 'MLPClassifier',
       'OneVsRestClassifier', 'QuadraticDiscriminantAnalysis',
       'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
      dtype='object')

In [18]:
df_true = df[df['target']==1]
df_false = df[df['target']==0]

ix = min(df_true.shape[0], df_false.shape[0])
df_true = df_true.head(ix)
df_false = df_false.head(ix)

df = pd.concat([df_true, df_false], axis=0)
df = df.sample(frac = 1)

df.reset_index(drop=True,inplace=True)

In [19]:
df.shape

(9972, 18)

In [None]:
df.to_csv('model_preds.csv', index=False)

In [20]:
import xgboost as xgb
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

In [21]:
X = df.drop('target', axis=1)
y = df['target']

In [22]:
X_train, X_val, y_train, y_val = train_test_split(X, y)

In [23]:
model = xgb.XGBClassifier()

In [24]:
model.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [25]:
pred = model.predict(X_val)

In [26]:
acc = np.float(f'{accuracy_score(y_val, pred) * 100:0.1f}')
print(acc)

52.8
