# **Setup and Data**

In [1]:
# Install required libraries
!pip install yfinance pandas matplotlib ta --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone


In [2]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

# Download SPY 1-year daily data
ticker = "SPY"
data = yf.download(ticker, period="1y", interval="1d")

# Reset index to make date a column
data.reset_index(inplace=True)

# Show first 5 rows
data.head()

  data = yf.download(ticker, period="1y", interval="1d")
[*********************100%***********************]  1 of 1 completed


Price,Date,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,SPY,SPY,SPY,SPY,SPY
0,2024-07-03,544.675964,545.041408,541.900535,541.940022,32789900
1,2024-07-05,547.816895,548.221824,544.340178,544.982206,41488400
2,2024-07-08,548.449036,549.407074,547.372418,548.607041,36110500
3,2024-07-09,548.982361,550.325616,548.686063,549.41695,27289700
4,2024-07-10,554.414673,554.760343,549.920659,550.216956,38701200


# **Feature Engineering**

In [3]:
import pandas as pd
import numpy as np
from ta.trend import EMAIndicator, MACD
from ta.momentum import RSIIndicator, AwesomeOscillatorIndicator
from ta.volatility import AverageTrueRange

# Copy only necessary columns as clean Series (no 2D shapes)
df = pd.DataFrame()
df['Date'] = data['Date']
df['Open'] = data['Open'].astype(float)
df['High'] = data['High'].astype(float)
df['Low'] = data['Low'].astype(float)
df['Close'] = data['Close'].astype(float)
df['Volume'] = data['Volume'].astype(float)

# Now calculate the 5 indicators safely
df['EMA20'] = EMAIndicator(close=df['Close'], window=20).ema_indicator()
df['MACD'] = MACD(close=df['Close']).macd()
df['MOM_6'] = AwesomeOscillatorIndicator(high=df['High'], low=df['Low']).awesome_oscillator()
df['RSI'] = RSIIndicator(close=df['Close'], window=14).rsi()
df['ATR'] = AverageTrueRange(high=df['High'], low=df['Low'], close=df['Close'], window=14).average_true_range()

# Drop NaN rows from rolling indicators
df.dropna(inplace=True)

# Show a preview
df[['Date', 'Close', 'EMA20', 'MACD', 'MOM_6', 'RSI', 'ATR']].head()

Unnamed: 0,Date,Close,EMA20,MACD,MOM_6,RSI,ATR
33,2024-08-20,551.826904,538.25118,1.370939,5.372547,61.276174,7.838514
34,2024-08-21,553.723267,539.724712,2.383652,8.271834,62.475031,7.799279
35,2024-08-22,549.377319,540.644008,2.803238,9.67007,58.040309,7.820697
36,2024-08-23,555.214722,542.031695,3.565689,10.918346,61.947486,7.746763
37,2024-08-26,553.891235,543.161175,4.01684,11.706642,60.570354,7.536294


# **Slope-Detection Labeling**

In [5]:
K = 3  # As shown in paper's Figure 1

# Calculate averages and std dev (Equation 1 in paper)
df['past_avg'] = df['Close'].rolling(window=K).mean().shift(1)
df['future_avg'] = df['Close'].shift(-K).rolling(window=K).mean()
df['mu'] = df[['past_avg', 'future_avg']].mean(axis=1)
df['sigma'] = df['Close'].rolling(window=2*K).std()  # Standard deviation threshold

# Slope calculation (δ in paper)
df['slope'] = df['future_avg'] - df['past_avg']

# 4-class labeling (Table I in paper)
conditions = [
    (df['Close'] > (df['mu'] + df['sigma'])) & (df['slope'] > 0),  # rise plus
    (df['slope'] > 0),                                              # rise
    (df['slope'] < 0),                                              # fall
    (df['Close'] < (df['mu'] - df['sigma'])) & (df['slope'] < 0)    # fall plus
]
choices = [3, 2, 1, 0]  # Encoded as integers
df['label'] = np.select(conditions, choices, default=2)  # Default to "rise"

# **Model Training**

In [10]:
import numpy as np

# Features to use
feature_cols = ['EMA20', 'MACD', 'MOM_6', 'RSI', 'ATR']
sequence_length = 22

# Convert data to numpy arrays
features = df[feature_cols].values
labels = df['label'].values

X = []
y = []

for i in range(sequence_length, len(df)):
    X.append(features[i-sequence_length:i])  # 22 x 5
    y.append(labels[i])  # label at day i

X = np.array(X)
y = np.array(y)

print("Input shape:", X.shape)  # should be (samples, 22, 5)
print("Label shape:", y.shape)

Input shape: (195, 22, 5)
Label shape: (195,)


In [11]:
from sklearn.model_selection import train_test_split

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False)  # No shuffle for time series

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])

Training samples: 156
Testing samples: 39


In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

# Build CNN
model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(22, 5)),
    MaxPooling1D(pool_size=2),
    Conv1D(32, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(4, activation='softmax')  # 4-class classificationn
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [14]:
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.2)

Epoch 1/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - accuracy: 0.3897 - loss: 30.6360 - val_accuracy: 0.4688 - val_loss: 10.7665
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4989 - loss: 9.5914 - val_accuracy: 0.4375 - val_loss: 5.2938
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3825 - loss: 6.1695 - val_accuracy: 0.4062 - val_loss: 2.3646
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4401 - loss: 2.1956 - val_accuracy: 0.4688 - val_loss: 1.3712
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3982 - loss: 1.7681 - val_accuracy: 0.5312 - val_loss: 1.3243
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4934 - loss: 1.1025 - val_accuracy: 0.4375 - val_loss: 1.0537
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━

In [15]:
model.save("cnn_price_trend_model.h5")



In [16]:
from google.colab import files

# Download the trained model file to your local machine
files.download("cnn_price_trend_model.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
import os

# Create folders if they don't exist
os.makedirs("model", exist_ok=True)
os.makedirs("data", exist_ok=True)
os.makedirs("backtest", exist_ok=True)

In [18]:
import shutil

shutil.move("cnn_price_trend_model.h5", "model/cnn_price_trend_model.h5")

'model/cnn_price_trend_model.h5'

In [19]:
import os
print(os.listdir("model"))

['cnn_price_trend_model.h5']


In [20]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [21]:
model.save("/content/drive/MyDrive/QR_Intern/cnn_price_trend_model.h5")



In [22]:
df.to_csv("/content/drive/MyDrive/QR_Intern/data.csv", index=False)