In [116]:
# I want to check when is the right time to purchase stock
# So that I will make at least 5%

In [117]:
# step 1 - Imports
import yfinance as yf
import numpy as np
import pandas as pd

In [141]:
# Common code
# Validate your understanding here. https://www.tradingview.com/chart/z2TJzaW5/?symbol=NSEIX%3ANIFTY1%21
def add_sma(df, window=20):
    """Add Simple Moving Average (SMA) to DataFrame."""
    df[f'SMA_{window}'] = df['Close'].rolling(window=window).mean()
    return df

def add_macd(df, span_short=12, span_long=26, span_signal=9):
    """Add MACD and Signal Line to DataFrame."""
    ema_short = df['Close'].ewm(span=span_short, adjust=False).mean()
    ema_long = df['Close'].ewm(span=span_long, adjust=False).mean()
    df['MACD'] = ema_short - ema_long
    df['Signal_Line'] = df['MACD'].ewm(span=span_signal, adjust=False).mean()
    return df

def add_rsi(df, window=14):
    """Add Relative Strength Index (RSI) to DataFrame."""
    delta = df['Close'].diff()

    gain = delta.clip(lower=0)   # positive changes
    loss = -delta.clip(upper=0)  # negative changes as positive

    avg_gain = gain.rolling(window=window).mean()
    avg_loss = loss.rolling(window=window).mean()

    rs = avg_gain / avg_loss
    df[f'RSI_{window}'] = 100 - (100 / (1 + rs))

    return df

def add_bollinger_bands(df, window=20, num_std=2):
    """Add Bollinger Bands to DataFrame."""
    mid = df['Close'].rolling(window=window).mean()
    std = df['Close'].rolling(window=window).std()
    df[f'BB_Middle'] = mid
    df[f'BB_Upper'] = mid + num_std * std
    df[f'BB_Lower'] = mid - num_std * std
    return df

def add_stochastic_oscillator(df, window=14, smooth_window=3):
    """Add Stochastic Oscillator (%K and %D) to DataFrame."""
    low_min = df['Low'].rolling(window=window).min()
    high_max = df['High'].rolling(window=window).max()
    df['%K'] = 100 * (df['Close'] - low_min) / (high_max - low_min)
    df['%D'] = df['%K'].rolling(window=smooth_window).mean()
    return df
def add_label_highest_perc(df, window=20):
    """
    Add a label column representing the highest percentage increase
    between current Close and max Close over the next `window` days.
    """
    close_series = df['Close'].squeeze()
    # Compute the maximum Close over the next 'window' days
    df['future_max'] = close_series.shift(-1).rolling(window=window, min_periods=1).max()

    # Calculate the percentage difference as label
    #df['label'] = ((df['future_max'] - df['Close']) / df['Close'])
    df['label'] = (df['future_max'] - close_series) / close_series * 100

    # Optionally drop the helper column
    #df.drop(columns=['future_max'], inplace=True)
    #df['label'] = 1

    return df

In [142]:
# Finalize which model we are using
# Linear Regression, Random Forest, SVM, ARIMA,


In [None]:
# step 2 - Get the data
data = yf.download('AAPL', start='2023-01-01', end='2026-01-01')
data.to_csv('aapl.csv')
data.head()


In [160]:
# Data clean up
data.dropna(inplace=True)

In [None]:
# Add features
# 1. Moving Average
add_sma(data)

# 2. MACD
add_macd(data)

# 3. RSI
#add_rsi(data)

# 4. Bollinger Bands
add_bollinger_bands(data)

# 5. Stoch. Osc.
add_stochastic_oscillator
# Validate your understanding here.



In [None]:
# Identify the labels
# 1. What is the highest percentage it has reached in next 20 days
data.to_csv('abc.csv')
data.dropna(inplace=True)
add_label_highest_perc(data)
#print("Done")



In [None]:
# step 3 - Split the features

features = data.drop(columns=['label']) # Input
labels = data['label'] # Output

split_row_no = int(len(data) * .8)

x_train = features.iloc[:split_row_no]
x_test = features.iloc[split_row_no:]
y_train = labels.iloc[:split_row_no]
y_test = labels.iloc[split_row_no:]

print (f"Train data size: {len(x_train)}, Test data size: {len(y_train)}")


In [None]:
# Step 4 - Train all the models
# Finalize which model we are using
# Linear Regression, SVM #, ARIMA,
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
lr = LinearRegression()
lr.fit(x_train, y_train)

svr = SVR()
svr.fit(x_train, y_train)

In [167]:
# Step 5 - Test
lr_pred = lr.predict(x_test)
svr_pred = svr.predict(x_test)


In [None]:
# compare the values lr_pred and y_test, to check how close they are
df = pd.DataFrame({"original":y_test, "lr": lr_pred, "svr":svr_pred })
# df = pd.DataFrame(svr_pred, y_test)
df

In [None]:
# Compare performance (pick the candidate)
from sklearn.metrics import mean_squared_error
lr_mse = mean_squared_error(y_test, lr_pred)
svr_mse = mean_squared_error(y_test, svr_pred)
print (f"Linear Regression MSE: {lr_mse}, SVR MSE: {svr_mse}")

In [None]:
# Step 6 - Tune the model using the hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
ridge = Ridge()
parameters = {'alpha': [0.01, 0.1, 1, 10, 100]}
ridge_regressor = GridSearchCV(ridge, parameters, scoring='neg_mean_squared_error', cv=5)
ridge_regressor.fit(x_train, y_train)
print (ridge_regressor.best_params_)





In [None]:
# Get the optimized model
ridge = Ridge(alpha=1)
ridge.fit(x_train, y_train)
ridge_pred = ridge.predict(x_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)
print (f"Ridge MSE: {ridge_mse}")


In [None]:
# Step 7 -Deploy and use it for our purpose
val = [x_test.iloc [1]]
print ("ridge: ",ridge.predict(val))
print ("lr: ",lr.predict(val))


In [None]:
# Input: Stock symbol, Expectation: 4

# GEt the data of the stock you want to purchase
# Calculate the all the parameters
# Get the data for current/previous day data

# output: Whether the stock can be puchased today (y/n) for the expectation gains mentioned
