<a href="https://colab.research.google.com/github/MasterBeard/Image-Augmentation/blob/main/Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install tushare

Collecting tushare
  Downloading tushare-1.4.6-py3-none-any.whl (255 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/255.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/255.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.5/255.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting simplejson (from tushare)
  Downloading simplejson-3.19.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.9/137.9 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bs4 (from tushare)
  Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Collecting websocket-client==0.57.0 (from tushare)
  Downloading websocket_client-0.57.0-py2.py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# **This notebook explains how to use reverse images to retrain CNN models.**

In [4]:
import numpy as np
import tushare as ts
import pandas as pd
import statsmodels.api as sm
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow import keras

# 1.Obtain FTSE index data from the Tushare platform.

In [5]:
pro = ts.pro_api("41addd8c3955aea5623099855def5d5ae794632258ad289d8fd02fb6")
name="FTSE"
whole_df = pro.index_global(ts_code=name, start_date='20100701', end_date='20240118')
whole_df=whole_df.fillna(method='ffill')

The code below is used to calculate the slope of the five-day closing prices.

In [7]:
def next_period_trend(data):
    window_size = 5
    result = data['close'].rolling(window_size).apply(lambda x: sm.OLS(x, sm.add_constant(range(len(x)))).fit().params[1])
    return result

Because this notebook is run on Colab, the following are the steps to read images from Google Drive. If running locally, this can be ignored, and you can directly read the images that have been downloaded.

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
%cd /content/drive/My Drive/Colab Notebooks/Image Augmentation/

/content/drive/My Drive/Colab Notebooks/Image Augmentation


# 2.Read images, one is a normal stock candlestick chart, and the other is a vertically flipped candlestick chart.

In [20]:
whole_image_array=np.load(str(name)+'_image32.npy')#normal
re_whole_image_array=np.load(str(name)+'_Re_image32.npy')#vertical

Calculate the closing price slope.

In [21]:
whole_df=whole_df.sort_values(by=['trade_date'],ascending=[True])
slope_n5=next_period_trend(whole_df)
forward_label=slope_n5[33:]

# 3.Horizontally flip the candlestick images to obtain another two sets of reversed images.

In [22]:
forward_image_array = whole_image_array
backward_image_array = whole_image_array.copy()
for i in range(len(whole_image_array)):
    backward_image_array[i] = whole_image_array[i][:, ::-1, :]

mirrior_image_array = re_whole_image_array
mirrior_backward_image_array = re_whole_image_array.copy()
for i in range(len(re_whole_image_array)):
    mirrior_backward_image_array[i] = re_whole_image_array[i][:, ::-1, :]

# 4.Classify the labels, marking those with a slope greater than 0 as 1, and all others as 0.

In [23]:
binary=forward_label>0
target_label = np.where(binary==True, 1, 0)

Normalize all images by dividing by 255.

In [24]:
backward_image_array = backward_image_array/ 255.0
forward_image_array = forward_image_array/ 255.0
mirrior_backward_image_array = mirrior_backward_image_array/ 255.0
mirrior_image_array = mirrior_image_array/255.0

Continue with the division of training, validation, and test sets.

In [25]:
train_image,test_image,train_label,test_label=train_test_split(forward_image_array,target_label,test_size=0.16,random_state=10)
train_image,val_image,train_label,val_label=train_test_split(train_image,train_label,test_size=0.2/0.84,random_state=10)

In [27]:
#Comprising 2169, 678, and 543 images, respectively.
train_image.shape[0],val_image.shape[0],test_image.shape[0]

(2169, 678, 543)

# 5.Construct the CNN model architecture.

In [28]:
def make_model():
    model = Sequential([

        layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(32, 32, 3)),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, padding='same', activation='relu'),
        layers.MaxPooling2D(),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu'),

        layers.Dense(2, name="outputs")
    ])

    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    return model

In [29]:
model = make_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 16, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 128)       36992     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 8, 8, 128)         0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 8, 8, 128)         147584    
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 4, 4, 128)         0

Establish an early stopping mechanism.

In [30]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    restore_best_weights=True)

In [32]:
# Train the model
history=model.fit(train_image, train_label, epochs=50,callbacks = [early_stopping],verbose=1,
                validation_data=(val_image, val_label))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


In [45]:
model = tf.keras.models.load_model("Original_model_"+str(name)+".h5")

The performance of the original model is as follows:

In [46]:
val_loss, val_acc = model.evaluate(val_image,  val_label, verbose=0)
test_loss, test_acc = model.evaluate(test_image,  test_label, verbose=0)
print(test_loss, test_acc)
print(val_loss, val_acc)

0.6679201126098633 0.6261510252952576
0.6437720656394958 0.6489675641059875


Save the original model

In [47]:
model.save("Best_model_"+str(name)+".h5")
model.save("Original_model_"+str(name)+".h5")

  saving_api.save_model(


Combine all reversed images into one dataset for convenient random sampling later.

In [48]:
com_train_image=np.concatenate((backward_image_array,mirrior_image_array,mirrior_backward_image_array),axis=0)

# 6.Below is the random selection of 5,000 reversed and normal candlestick images to compose new training images. The original model will enter this algorithm, and models without improvement will be eliminated, keeping only those that show improvement. The process is set to repeat 10 times.

In [49]:
n = 5000

for i in range(10):
    index = np.random.choice(com_train_image.shape[0], int(n), replace=False)
    x_random = com_train_image[index]
    y_random = np.random.randint(0,2, size = x_random.shape[0])
    x_random = np.concatenate((x_random,train_image),axis=0)
    y_random = np.concatenate((y_random,train_label),axis=0)
    order = np.arange(len(y_random))
    np.random.shuffle(order)
    x_random = x_random[order]
    y_random = y_random[order]

    history = model.fit(x_random, y_random, epochs=100,callbacks = [early_stopping],verbose=0,
                validation_data=(val_image, val_label))
    new_loss, new_acc = model.evaluate(val_image,  val_label, verbose=0)

    if  new_loss<val_loss:
        model.save("Best_model_"+str(name)+".h5")
        val_loss = new_loss
        val_acc = new_acc

        print(i,new_loss,new_acc)

    else:
        #continue
        print('fail'+str(i),new_loss,new_acc)
        model = keras.models.load_model("Best_model_"+str(name)+".h5")

val_loss, val_acc = model.evaluate(val_image,  val_label, verbose=0)
test_loss, test_acc = model.evaluate(test_image,  test_label, verbose=0)
print(test_loss, test_acc)
print(val_loss, val_acc)

0 0.6271610856056213 0.6504424810409546
fail1 0.631825864315033 0.6445427536964417
2 0.6264476180076599 0.6533923149108887
fail3 0.640033483505249 0.6637167930603027
fail4 0.6365007758140564 0.6474926471710205
fail5 0.6621963977813721 0.6622418761253357
fail6 0.6828692555427551 0.6415929198265076
fail7 0.6493455767631531 0.6563422083854675
fail8 0.6676269173622131 0.6342182755470276
fail9 0.6385443806648254 0.6401180028915405
0.6651716232299805 0.6482504606246948
0.6264476180076599 0.6533923149108887


After two improvements, our model achieved an accuracy of 64.82% on the test set, and the loss on the validation set was reduced to 0.6264 (before retraining, it was 0.6437).