In [7]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import r2_score,mean_squared_error

from matplotlib.pyplot import plot as plot
%matplotlib inline

df=pd.DataFrame(yf.download('AAPL',start='2020-01-01',end='2025-01-01',timeout=30))
df

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2


In [2]:
import ta.momentum

df['SMA_20']=df['Close'].rolling(window=20).mean()
df['SMA_50']=df['Close'].rolling(window=50).mean()

delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

df.columns

MultiIndex([('Adj Close', 'AAPL'),
            (    'Close', 'AAPL'),
            (     'High', 'AAPL'),
            (      'Low', 'AAPL'),
            (     'Open', 'AAPL'),
            (   'Volume', 'AAPL'),
            (   'SMA_20',     ''),
            (   'SMA_50',     ''),
            (      'RSI',     '')],
           names=['Price', 'Ticker'])

In [3]:
ema_12 = df['Close'].ewm(span=12, adjust=False).mean()
ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema_12 - ema_26
df['Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

df['AvgVolume'] = df['Volume'].rolling(window=20).mean()

In [4]:
latest = df.iloc[-1]
summary = {
    "Price": round(latest['Close'], 2),
    "SMA_20": round(latest['SMA_20'], 2),
    "SMA_50": round(latest['SMA_50'], 2),
    "RSI": round(latest['RSI'], 2),
    "MACD": round(latest['MACD'], 2),
    "Signal": round(latest['Signal'], 2),
    "Volume": int(latest['Volume']),
    "AvgVolume": int(latest['AvgVolume'])
}

df.dropna()
summary


IndexError: single positional indexer is out-of-bounds

In [None]:
plot(df['Close'])
plot

In [None]:
features = ['Close', 'RSI', 'MACD', 'Signal', 'SMA_20', 'SMA_50', 'Volume']

print(df.isnull().sum())
df = df.dropna()
data = df[features]
# Scale features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
scaled_data[0]
# Prepare training data


In [None]:
X, y = [], []
sequence_length = 60
for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i-sequence_length:i])
    y.append(scaled_data[i, 0])  # Predicting 'Close'

X, y = np.array(X), np.array(y)

# Build LSTM model
len(X)==len(y)

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(1))

# Compile and train
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X, y, epochs=10, batch_size=32)
p=model.predict(X)
# Predict next-day close
last_seq = scaled_data[-sequence_length:]
last_seq = np.expand_dims(last_seq, axis=0)
predicted_scaled = model.predict(last_seq)
predicted_close = scaler.inverse_transform(
    np.concatenate((predicted_scaled, np.zeros((1, scaled_data.shape[1] - 1))), axis=1)
)[0][0]

print(f"📊 Predicted next-day Close: ${round(predicted_close, 2)}")

In [None]:
mse = mean_squared_error(y,p)
rmse = np.sqrt(mse)
print("RMSE:", rmse)
a=r2_score(y,p)
a

In [None]:
plot(y)
plot(p)
plot

In [None]:
import xgboost as xgb
model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1)
features = ['Close', 'RSI', 'MACD', 'Signal', 'SMA_20', 'SMA_50', 'Volume']
# Align features and labels
aligned_data = data.iloc[-len(y):]  # Ensure the same number of rows as y
model.fit(aligned_data[features], y)

# Reshape X to match the expected input for XGBoost
X_reshaped = X[:, -1, :]  # Use the last time step of each sequence
y_pred = model.predict(X_reshaped)

# RMSE
rmse = np.sqrt(mean_squared_error(y, y_pred))
print(f"XGBoost RMSE: {rmse:.2f}")

In [None]:
plot(y)
plot(y_pred)
plot