In [1]:
'''
This program is a full start to end implementation of a CNN that classifies
candlestick charts based on the price being higher or lower after a certain
period of time.

The program that does the following:

1. Calculate price increase or decrease after period
2. Create and resize candlestick images
3. Set up convolutional neural network (CNN)
4. Train CNN on training data
5. Test CNN on test data

Future Tasks:
- Calculate PnL values based on stop loss and take profit
- Better understanding of CNN and definition
- Better understanding of CNN training and testing
'''

'\nThis program is a full start to end implementation of a CNN that classifies\ncandlestick charts based on the price being higher or lower after a certain\nperiod of time.\n\nThe program that does the following:\n\n1. Calculate price increase or decrease after period\n2. Create and resize candlestick images\n3. Set up convolutional neural network (CNN)\n4. Train CNN on training data\n5. Test CNN on test data\n\nFuture Tasks:\n- Calculate PnL values based on stop loss and take profit\n- Better understanding of CNN and definition\n- Better understanding of CNN training and testing\n'

In [1]:
# Configurable Parameters

ticker = "AAPL"
start_date = '2023-01-01'
end_date = '2024-01-01'
timeframe = '1d'
length = 30
save_images = True

In [2]:
# Other Parameters

charts_path = 'charts/'
chart_img_name = 'chart_'
chart_img_ext = '.jpg'

In [3]:
# Library and Tool Imports

import matplotlib.pyplot as plt
import mplfinance as mpf
import numpy as np
import os 
import shutil
import yfinance as yf
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [4]:
# Download Stock Data

data = yf.download(ticker, start=start_date, end=end_date, interval=timeframe)

[*********************100%%**********************]  1 of 1 completed


In [5]:
# Calculate Increase and Decrease Values

closing_prices = data['Close']
pnl = []

for i in range(len(closing_prices) - length):
    pnl.append(int(closing_prices.iloc[i + length] > closing_prices.iloc[i]))

In [6]:
# Create and Save Image Charts

if not os.path.exists(charts_path):
    os.mkdir(charts_path)

for i in range(len(data) - length):
    mpf.plot(
        data.iloc[i:i+length],
        type='candle',
        ylabel='',
        figscale=0.5,
        datetime_format='',
        tight_layout=True,
        savefig=f'{charts_path}{chart_img_name}{i}{chart_img_ext}')

In [7]:
# Resize Image Charts

image_width, image_height = Image.open(f'{charts_path}{chart_img_name}{0}{chart_img_ext}').size

image_width, image_height = image_width // 10 * 10, image_height // 10 * 10

for i in range(len(pnl)):
    path = f'{charts_path}{chart_img_name}{i}{chart_img_ext}'
    with Image.open(path) as img:
        img = img.resize((image_height, image_width))
        img.save(path)

In [8]:
# Create Arrays from Image Data

images = []

for i in range(len(pnl)):
    img = plt.imread(f'{charts_path}{chart_img_name}{i}{chart_img_ext}')
    images.append(img / 255.)

if not save_images:
    shutil.rmtree(charts_path)

In [10]:
# Split Data into Training and Testing Sets

images = np.array(images)
labels = np.array(pnl)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [11]:
# Build the CNN Model

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_width, image_height, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(


In [12]:
# Data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Train the model
history = model.fit(
    train_datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=25
)

Epoch 1/25


  self._warn_if_super_not_called()


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.5683 - loss: 2.2422 - val_accuracy: 0.7955 - val_loss: 0.5548
Epoch 2/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.5668 - loss: 0.7120 - val_accuracy: 0.7955 - val_loss: 0.5317
Epoch 3/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.6754 - loss: 0.6604 - val_accuracy: 0.7955 - val_loss: 0.5847
Epoch 4/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.7625 - loss: 0.6388 - val_accuracy: 0.7955 - val_loss: 0.5523
Epoch 5/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.7233 - loss: 0.6081 - val_accuracy: 0.7955 - val_loss: 0.5237
Epoch 6/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.7015 - loss: 0.6299 - val_accuracy: 0.7955 - val_loss: 0.5434
Epoch 7/25
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s

In [15]:
# Model Evaluation with Testing Data

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy*100}%")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.7652 - loss: 0.5530
Test accuracy: 77.27272510528564%
