# CNN Regression

***


# Getting Started

In [65]:
import json
import time

import numpy as np
import tensorflow as tf
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

from src.repository import BarsRepo, DbLocation, DbSample

# Create File DataFrame

In [66]:
bars_repo = BarsRepo(DbLocation.LOCAL, DbSample.ALL)
charts_df = bars_repo.get_charts_nth_price(n_after_bars=5)

In [67]:
CHOSEN_N = 12

# charts_df = pd.concat(charts_dfs_symbol[:3]).sample(frac=1.0, random_state=1).reset_index(drop=True)
charts_df = charts_df.sample(frac=1.0, random_state=1).reset_index(drop=True)
charts_df[f'n_{CHOSEN_N}'] = charts_df.apply(lambda row : json.loads(row['n'])[f'{CHOSEN_N}'], axis = 1)
charts_df['filepath_jupyter'] = charts_df.apply(lambda row : f'../../{row["filepath"]}', axis = 1)


In [68]:
train_df, test_df = train_test_split(charts_df, train_size=0.7, shuffle=True, random_state=1)

# Loading Images

In [69]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [None]:
train_df

In [None]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath_jupyter',
    y_col=f'n_{CHOSEN_N}',
    target_size=(119, 86),  # (119, 86), (714, 516)
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath_jupyter',
    y_col=f'n_{CHOSEN_N}',
    target_size=(119, 86),  # (119, 86), (714, 516)
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='filepath_jupyter',
    y_col=f'n_{CHOSEN_N}',
    target_size=(119, 86),  # (119, 86), (714, 516)
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

# Training

In [72]:
inputs = tf.keras.Input(shape=(119, 86, 3))  # (714, 516, 3) / (119, 86, 3)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=2)(x)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=2)(x)
x = tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=2)(x)

x = tf.keras.layers.Flatten()(x)

x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)

outputs = tf.keras.layers.Dense(1, activation='linear')(x)

In [None]:
start = time.time()
model = tf.keras.Model(inputs=inputs, outputs=outputs)

def get_f1(y_true, y_pred):
    import tensorflow.keras.backend as K
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val


optimizer = tf.keras.optimizers.Adam(lr=0.000_1)
model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['accuracy', get_f1]
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=50,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
    ]
)

end = time.time()
print(f'training TIME: {end - start}')

# Results

In [None]:
predicted = np.squeeze(model.predict(test_images))
actual = test_images.labels

test_loss, test_acc, test_f1 = model.evaluate(test_images)
print(f"Test loss: {test_loss:.3f}")
print(f"Test accuracy: {test_acc:.3f}")
print(f"Test F1-score: {test_f1:.3f}")

# rmse = np.sqrt(model.evaluate(test_images, verbose=0))
# print("     Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(actual, predicted)
print("Test R^2 Score: {:.5f}".format(r2))

In [None]:
x = actual - predicted
x_2 = [abs(x) for x in x.tolist()]
import statistics
x_mean = statistics.mean(x_2)
print(f'diff mean: {x_mean}')
print(f'actual mean: {statistics.mean(actual)}')
print(f'actual positive mean: {statistics.mean([x for x in actual.tolist() if x > 0])}')
print(f'actual negative mean: {statistics.mean([x for x in actual.tolist() if x < 0])}')



same, diff = 0, 0
for i, val_act in enumerate(actual):
    val_pred = predicted[i]
    if val_pred > 0 and val_act > 0:
        same += 1
    elif val_pred < 0 and val_act < 0:
        same += 1
    elif val_pred == 0 and val_act == 0:
        same += 1
    else:
        diff += 1
print(f'same: {same}, diff: {diff}')


In [None]:
import matplotlib.pyplot as plt
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(loss) + 1)
plt.plot(epochs, accuracy, "bo", label="Training accuracy")
plt.plot(epochs, val_accuracy, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()