In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,roc_auc_score

In [4]:
stock_name = ""
s_length = 0

def load_data():
    target_stock = pd.read_csv("./data/n_" + stock_name + ".csv")
    target_stock = pd.DataFrame(target_stock)
    # 时间点长度
    time_stamp = 50
    # 划分训练集与验证集
    target_stock = target_stock[['Open', 'High', 'Low', 'Close', 'Volume']]  # 'Volume'

    # 新增一列正负表示涨跌
    close = target_stock['Close'].tolist()
    y = []
    for i in range(len(target_stock) - 1):
        if close[i + 1] >= close[i]:
            y.append(1)
        else:
            y.append(-1)
    y.append(0)

    v4 = []
    for i in range(len(target_stock) - time_stamp - 1):
        if (y[i + time_stamp - 2] == 1):
            v4.append(1)
        else:
            v4.append(0)
    v4.append(0)
    target_stock["trend"] = y
    # 归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(target_stock)

    train = scaled_data[0:s_length + time_stamp]
    test = scaled_data[s_length - time_stamp:]
    # 训练集
    x_train, y_train = [], []
    for i in range(len(train) - time_stamp):
        train[i + time_stamp - 1][5] = v4[i]
        x_train.append(train[i:i + time_stamp])
        y_train.append(train[i + time_stamp, 3])

    x_train, y_train = np.array(x_train), np.array(y_train)
    # 测试集
    x_test, y_test = [], []
    for i in range(len(test) - time_stamp):
        test[i + time_stamp - 1][5] = v4[i + s_length - time_stamp]
        x_test.append(test[i:i + time_stamp])
        y_test.append(test[i + time_stamp, 3])

    x_test, y_test = np.array(x_test), np.array(y_test)

    return x_train, x_test, y_train, y_test


def transncomp(closing_price, y_valid):
    # preprocessing,ues list
    y_valid = y_valid.reshape(-1)
    closing_price = np.array(closing_price)
    closing_price.reshape(-1)

    # temp1
    y_valid.tolist()
    temp1 = []
    for i in range(len(y_valid) - 1):
        if y_valid[i + 1] >= y_valid[i]:
            temp1.append(1)
        else:
            temp1.append(-1)

    # temp2
    closing_price.tolist()
    temp2 = []
    for i in range(len(closing_price) - 1):
        if closing_price[i + 1] >= closing_price[i]:
            temp2.append(1)
        else:
            temp2.append(-1)
    TP, FN, FP, TN = 0,0,0,0
    # compare
    for i, j in zip(temp1,temp2):
        if i==1 and j==1:
            TP += 1
        if i==1 and j==-1:
            FN += 1
        if i==-1 and j==1:
            FP += 1
        if i==-1 and j==-1:
            TN += 1

    accuracy=(TP+TN)/(TP+TN+FP+FN)*100
    precision=TP/(TP+FP)*100
    recall=TP/(TP+FN)*100
    F1=2*((precision*recall)/(precision+recall))
    auc = roc_auc_score(temp1,temp2)*100
    return accuracy,precision,recall,F1,auc

In [5]:
def comp_ml():
    x_train, x_test, y_train, y_test = load_data()

    #LSTM
    # from tensorflow.keras.models import Sequential
    # from tensorflow.keras.layers import LSTM,Dense
    # lstm_model = Sequential()
    # lstm_model.add(LSTM(units=100, return_sequences=True, input_dim=x_train.shape[-1], input_length=x_train.shape[1]))
    # lstm_model.add(LSTM(units=50))
    # lstm_model.add(Dense(1))
    # lstm_model.compile(loss='mean_squared_error', optimizer='adam')
    # lstm_model.fit(x_train, y_train, epochs=2, batch_size=32, verbose=1,validation_split=0.2)
    # lstm_result = lstm_model.predict(x_test)
    # acc = transncomp(lstm_result,y_test)


    # 数据展平
    x_train_new=np.reshape(x_train,(x_train.shape[0],-1))
    y_train_new=np.reshape(y_train,(y_train.shape[0]))
    x_test_new=np.reshape(x_test,(x_test.shape[0],-1))
    y_test_new=np.reshape(y_test,(y_test.shape[0]))

    #ARIMA
    from statsmodels.tsa.arima_model import ARIMA
    model_ARIMA = ARIMA()
    model_ARIMA.fit(x_train_new,y_train_new)
    ARIMA_result = model_ARIMA.predict(x_test_new)
    acc = transncomp(ARIMA_result,y_test_new)

    #KNN
    # from sklearn import neighbors
    # model_KNeighborsRegressor = neighbors.KNeighborsRegressor()
    # model_KNeighborsRegressor.fit(x_train_new,y_train_new)
    # knn_result = model_KNeighborsRegressor.predict(x_test_new)
    # acc = transncomp(knn_result,y_test_new)
    
    #Random Forest
    # from sklearn import ensemble
    # model_RandomForestRegressor = ensemble.RandomForestRegressor(n_estimators=1)  
    # model_RandomForestRegressor.fit(x_train_new,y_train_new)
    # rf_result = model_RandomForestRegressor.predict(x_test_new)
    # acc = transncomp(y_test_new,rf_result)

    # #SVM
    # from sklearn import svm
    # model_SVR = svm.SVR()
    # model_SVR.fit(x_train_new,y_train_new)
    # svm_result = model_SVR.predict(x_test_new)
    # acc = transncomp(svm_result,y_test_new)


    f = open("ARIMA_out.txt", "a")
    print(stock_name,"ARIMA",file=f)
    print("Accuracy:%.2f, Precision:%.2f,Recall:%.2f,F1-score:%.2f,AUC:%.2f"%(acc[0],acc[1],acc[2],acc[3],acc[4]),file=f)
    print("**"*30,file=f)
    f.close()

In [6]:
stock_list = {"Google": 2800,"Amazon": 3400,"Apple": 3400,"Microsoft": 3400,  "Tesla": 1800}
for key, value in stock_list.items():
    print(key, "------------", value)
    stock_name = key
    s_length = value
    x_train, x_test, y_train, y_test = load_data()
    comp_ml()

Google ------------ 2800


NotImplementedError: 
statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been removed in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and statsmodels.tsa.SARIMAX.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained. It also offers alternative specialized
parameter estimators.
