In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from parameters_x import *


def rev_ema(x):
    x = list(x)
    x.reverse()
    df = pd.Series(x)
    df_ewm = df.ewm(alpha=0.98).mean()
    df_ewm = list(df_ewm)
    df_ewm.reverse()
    df_ewm = pd.Series(df_ewm)
    return df_ewm


def hist_count(l1, l2, nums):
    df = pd.DataFrame({"a": l1, "b": l2})
    df['c'] = 0
    df = df.groupby(['a', 'b']).count().reset_index()
    w = np.zeros([nums, nums])
    for _, row in df.iterrows():
        w[int(row.a), int(row.b)] = row.c
    return w


def simple_re_wheeled(data, f_to_use, bst, version='long'):
    re_list = []
    rank_pre = []
    rank_label = []
    turn = []
    last_stock_name = []
    nums_return = []
    nums_p_return = []
    market_return_list = []
    limit_tag_list = []
    time_list = []
    time_series = list(sorted(set(list(data['date_time']))))
    for day in time_series:
        d_te = data[data['date_time'] == day].copy()[f_to_use]
        all_te = data[data['date_time'] == day].copy()
        market_return = [all_te["return"].mean()]*len(all_te)
        X_test = xgb.DMatrix(d_te)
        # print(bst.best_ntree_limit)
        preds = bst.predict(X_test, ntree_limit=bst.best_ntree_limit)
        stock_name = np.array(all_te['unique_symbol'])
        if version == 'long':
            test_bs, test_select_tag = make_decision_long(preds, rightbound)
        test_select_buffer_tag = make_decision_buffer(preds, stock_name, last_stock_name, rightbound, bufferbound)
#         暂时不考虑降低换手
#         test_select_tag = test_select_buffer_tag
        return_list = np.array(all_te['return'])
        stock_list = list(np.array(all_te['unique_symbol'])[test_select_tag])
        # re = (return_list[test_select_tag] * test_bs)
        re = return_list[test_select_tag]
        re_list.append(re.mean())
        print("###############################################")
        print(stock_list)
        print(re.mean(), day)
        print(re)
        rank_pre = rank_pre + list(pd.qcut(pd.Series(preds).rank(method='first'), 10, labels=False))
        rank_label = rank_label + list(all_te['rank_label'])
        nums_p_return = nums_p_return + list(preds)
        nums_return = nums_return + list(return_list)
        time_list = time_list + list(all_te["date_time"])
        market_return_list = market_return_list + market_return
        limit_tag_list = limit_tag_list + list(all_te["limit_tag"])
        turn.append(stock_list)
        last_stock_name = stock_list
        # print(len(stock_list))
    return re_list, rank_pre, rank_label, turn, nums_p_return, nums_return, market_return_list, limit_tag_list,time_list


def make_decision_long(preds, rightbound):
    bs = []
    select_tag = []
    for i in range(len(preds)):
        if preds[i] >= np.percentile(preds, rightbound):
            bs.append(1)
            select_tag.append(True)
        else:
            select_tag.append(False)
    bs = np.array(bs)
    select_tag = np.array(select_tag)
    return bs, select_tag



def train_op(X_train, y_train, params, num_rounds, X_v, y_v, early):
    d_train = xgb.DMatrix(X_train, y_train)
    dtest = xgb.DMatrix(X_v, y_v)
    eval_set = [(d_train, 'train'), (dtest, 'eval')]
    bst = xgb.train(params, d_train, num_boost_round=num_rounds, evals=eval_set, early_stopping_rounds=early)
    return bst


def f_importance(bst, f_to_use):
    importance_dic= {}
    for f in f_to_use:
        importance_dic[f] = []
    sum = 0
    for feature_name in bst.get_fscore():
        sum = sum + bst.get_fscore()[feature_name]
    for feature_name, feature_socre in bst.get_fscore().items():
        importance_dic[feature_name].append(feature_socre/sum)
    # xgb.plot_importance(bst)
    # plt.show()
    return importance_dic


def f_importance_bar(importance_dic):
    top = 20
    name = []
    importance_mean = []
    importance_mean_dic = {}
    for feature, importance in importance_dic.items():
        name.append(feature)
        temp = np.array(importance).mean()
        if np.isnan(temp):
            importance_mean.append(0)
        else:
            importance_mean.append(temp)
        importance_mean_dic[feature] = importance_mean[-1]
    name = np.array(name)
    sorted_imp = [[importance_mean[i], i] for i in range(len(importance_mean))]
    sorted_imp = sorted(sorted_imp, key=lambda student: student[0], reverse=True)
    sorted_imp = np.array(sorted_imp[:top])
    top_index = sorted_imp[:, 1]
    top_index = [int(i) for i in top_index]

    sns.barplot(y=name[top_index], x=sorted_imp[:, 0], orient='h', alpha=0.8, color='red')
    plt.ylabel('Factor', fontsize=10)
    plt.xlabel('Importance', fontsize=10)
    plt.xticks(rotation='horizontal')
    plt.yticks(fontsize=17)


def prepare_data(data, time, col):
    data = data[col]
    data = data[data.time.isin(time)].copy()
    return data



def add_return(re_list, turnover=None):
    if turnover == None:
        turnover = len(re_list)*[0]
    net_worth = [1]
    for i in range(len(re_list)):
        r = re_list[i]
        net_worth.append(net_worth[-1] + r - 0.002*turnover[i])
    return net_worth


def nshift(df, gb_col, col, n):
    for i in range(1, n):
        gb = df.groupby([gb_col])[col]
        df[col + str(i)] = gb.transform(lambda x: x.shift(i))
    return df


def norm_pcol(df, col, adjust_col, n):
    for i in range(1, n):
        df[col + str(i)] = (df[col + str(i)] - df[adjust_col] + df[adjust_col + str(i)])/df['close']
    return df


def norm_vcol(df, col, n):
    for i in range(1, n):
        df[col + str(i)] = df[col + str(i)]/df[col]
    return df



def max_decline_acc(r_path):
    d = [[0, 0]]
    max_tag = [r_path[0], 0]
    for i in range(1, len(r_path)):
        r = r_path[i]
        if max_tag[0] < r:
            max_tag[0] = r
            max_tag[1] = i
            d.append([0, i])
        elif max_tag[0] >= r:
            d.append([(r - max_tag[0])/max_tag[0], max_tag[1]])
    return np.array(d)


def max_decline_add(r_path):
    d = [[0, 0]]
    max_tag = [r_path[0], 0]
    for i in range(1, len(r_path)):
        r = r_path[i]
        if max_tag[0] < r:
            max_tag[0] = r
            max_tag[1] = i
            d.append([0, i])
        elif max_tag[0] >= r:
            d.append([r - max_tag[0], max_tag[1]])
    return np.array(d)

