-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_results.py
150 lines (143 loc) · 6.37 KB
/
plot_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
### ## ### ##
# Given a "results" folder path from build_model.py
# Build a MS Word compatible file with, for each currency:
# - Pie charts with class distribution for train and test sets
# ?- Bar chart with data split
# - # of features, # of records for train and test set
# - Feature importances bar graph
# - Learning curves
# - Classification reports for train and test set
# ?- Parameters table
#
# From equity results print:
# - Cumulative table with profit for each currency
# - Cumulative equity graph
### ## ### ##
import os, json
try:
import cPickle as pickle
except:
import pickle
from pathlib import Path
import argparse
from matplotlib import pyplot as plt
import pandas as pd
import docx
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Cm
from lib.dataset import get_class_distribution
import numpy as np
def get_report_list(dir, equity_dir=None):
index = {}
for file in os.listdir(dir):
if not file.endswith('_report.json'):
continue
key = file.replace('_report.json', '')
index[key] = {
'report' : '{}/{}_report.p'.format(dir, key),
'learning' : '{}/{}_learning_curve.png'.format(dir, key),
'features' : '{}/{}_feature_importances.png'.format(dir, key),
'distribution' : '{}/{}_class_dist.png'.format(dir, key),
'parameters' : '{}/{}_parameters.json'.format(dir, key)
}
if equity_dir:
index[key].update({'trading':'{}/{}_trading.png'.format(equity_dir, key)})
return index
def main(pipeline, result_dir, out_file, equity_dir=None):
doc = docx.Document()
index = get_report_list(result_dir, equity_dir)
doc.add_heading("{} Models training".format(pipeline), 0)
for key, files in index.items():
print("Processing {} ..".format(key))
report = None
with open(files['report'], 'rb') as f:
report = pickle.load(f)
if not report:
print("Error opening report: {}".format(files['report']))
continue
if not os.path.exists(files['features']):
#features = report['feature_importances']
features = report.cv_report['feature_importances']
df = pd.DataFrame.from_dict(features, orient='index', columns=['importance']).iloc[::-1] # Create dataframe and invert index (high to low)
plt.rcdefaults()
df.plot(
kind='barh',
title='{} Feature Importances'.format(key),
figsize=(14, 20),
ylim=(df.shape[0]+1),
legend=False,
fontsize=8,
width=0.5
)
plt.tight_layout(pad=0.5)
if not os.path.exists(files['distribution']):
train_dist = get_class_distribution(report.y_train)
test_dist = get_class_distribution(report.y_test)
class_labels = ['SELL', 'HOLD', 'BUY']
train_wedges = [ train_dist[0]['count'], train_dist[1]['count'], train_dist[2]['count']]
test_wedges = [ test_dist[0]['count'], test_dist[1]['count'], test_dist[2]['count']]
def func(pct, allvals):
absolute = int(pct / 100. * np.sum(allvals))
return "{:.1f}% ({:d})".format(pct, absolute)
# Plot training set
fig, (train_ax, test_ax) = plt.subplots(1, 2, figsize=(6, 3), subplot_kw=dict(aspect="equal"))
fig.suptitle("Class Distribution")
_n_wedges, _n_texts, _n_autotexts = train_ax.pie(train_wedges, autopct=lambda pct: func(pct, train_wedges), textprops=dict(color="w"))
train_ax.legend(_n_wedges, class_labels,
title="Classes",
loc="center left",
bbox_to_anchor=(1, 0, 0.5, 1))
train_ax.set_title("Train")
# Plot test set
_t_wedges, _t_texts, _t_autotexts = test_ax.pie(test_wedges, autopct=lambda pct: func(pct, test_wedges), textprops=dict(color="w"))
test_ax.legend(_t_wedges, class_labels,
title="Classes",
loc="center left",
bbox_to_anchor=(1, 0, 0.5, 1))
test_ax.set_title("Test")
# Make the layout tight
plt.tight_layout(pad=0.5)
plt.savefig(files['distribution'])
plt.close()
doc.add_heading("{} Dataset".format(key), 1)
# Dataset table
doc.add_heading("Dataset analysis", 3)
table = doc.add_table(rows=2, cols=4)
table.cell(0, 0).text = ""
table.cell(0, 1).text = "Train"
table.cell(0, 2).text = "Test"
table.cell(0, 3).text = "Total"
table.cell(1, 0).text = "# of Records"
table.cell(1, 1).text = str(report.y_train.shape[0])
table.cell(1, 2).text = str(report.y_test.shape[0])
table.cell(1, 3).text = str(report.y_train.shape[0] + report.y_test.shape[0])
table.style = 'Medium Grid 3 Accent 1'
# Class distribution graph
doc.add_picture(files['distribution'], width=Cm(16))
doc.add_heading("{} Model".format(key), 1)
#doc.add_picture(files['learning'], width=docx.shared.Inches(5), height=docx.shared.Inches(7))
doc.add_picture(files['learning'], width=Cm(16))
last_paragraph = doc.paragraphs[-1]
last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# Add Parameters table
with open(files['parameters']) as f:
params = json.load(f)
doc.add_heading("Classifier Parameters".format(key), 3)
table = doc.add_table(rows=len(params) + 1, cols=2)
table.cell(0, 0).text = "Parameter"
table.cell(0, 1).text = "Value"
for i, key in enumerate(params.keys()):
table.cell(i+1, 0).text = str(key)
table.cell(i+1, 1).text = str(params[key])
table.style = 'Medium Grid 3 Accent 1'
doc.add_picture(files['features'], width=Cm(16))
if 'trading' in files and os.path.exists(files['trading']):
doc.add_picture(files['trading'], width=Cm(16))
doc.save(out_file)
if __name__ == '__main__':
main(
'XGBoost',
'B:/Tesi-POLITO/LSTM_forecaster/results/timedspline_debug_xgboost_splines_experiment_171220_060755',
'B:/Tesi-POLITO/LSTM_forecaster/results/result.docx',
equity_dir='B:/Tesi-POLITO/LSTM_forecaster/equities/test2'
)