In [4]:
pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.22-py2.py3-none-any.whl (63 kB)
     -------------------------------------- 63.2/63.2 kB 375.5 kB/s eta 0:00:00
Collecting pytz>=2022.5
  Downloading pytz-2023.3-py2.py3-none-any.whl (502 kB)
     ------------------------------------ 502.3/502.3 kB 583.8 kB/s eta 0:00:00
Collecting html5lib>=1.1
  Downloading html5lib-1.1-py2.py3-none-any.whl (112 kB)
     ------------------------------------ 112.2/112.2 kB 465.4 kB/s eta 0:00:00
Collecting frozendict>=2.3.4
  Downloading frozendict-2.3.8-cp310-cp310-win_amd64.whl (35 kB)
Collecting multitasking>=0.0.7
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Collecting appdirs>=1.4.4
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: pytz, multitasking, appdirs, html5lib, frozendict, yfinance
  Attempting uninstall: pytz
    Found existing installation: pytz 2022.1
    Uninstalling pytz-2022.1:
      Successfully uninstalled pytz-2022.1
Successfully


[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_datareader import data as pdr
import yfinance as yf
from datetime import datetime

In [None]:
# Set up matplotlib and seaborn
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# Set the start and end dates for data retrieval
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

# Define the list of tech stocks
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']

In [None]:
# Download the stock data using pandas_datareader and yfinance
yf.pdr_override()
company_list = []
for stock in tech_list:
    data = pdr.get_data_yahoo(stock, start, end)
    data["Company"] = stock
    company_list.append(data)

In [None]:
# Concatenate the stock data into a single DataFrame
df = pd.concat(company_list)

In [None]:
# Function to plot closing prices for each company
def plot_closing_prices(company_list):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    for i, company in enumerate(company_list):
        ax = axes[i // 2, i % 2]
        company.plot(y='Adj Close', ax=ax)
        ax.set_xlabel(None)
        ax.set_ylabel('Adj Close')
        ax.set_title(f"Closing Price of {company['Company'].iloc[0]}")
    plt.tight_layout()
    plt.show()

In [None]:
# Plot closing prices for each company
plot_closing_prices(company_list)


In [None]:
# Function to plot sales volume for each company
def plot_sales_volume(company_list):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    for i, company in enumerate(company_list):
        ax = axes[i // 2, i % 2]
        company.plot(y='Volume', ax=ax)
        ax.set_xlabel(None)
        ax.set_ylabel('Volume')
        ax.set_title(f"Sales Volume for {company['Company'].iloc[0]}")
    plt.tight_layout()
    plt.show()

In [None]:
# Plot sales volume for each company
plot_sales_volume(company_list)

In [None]:
# Function to calculate and plot moving averages for each company
def plot_moving_averages(company_list, ma_days=[10, 20, 50]):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    for i, company in enumerate(company_list):
        ax = axes[i // 2, i % 2]
        company['MA for 10 days'] = company['Adj Close'].rolling(10).mean()
        company['MA for 20 days'] = company['Adj Close'].rolling(20).mean()
        company['MA for 50 days'] = company['Adj Close'].rolling(50).mean()
        company[['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=ax)
        ax.set_xlabel(None)
        ax.set_ylabel('Price')
        ax.set_title(f"Moving Averages for {company['Company'].iloc[0]}")
        ax.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Plot moving averages for each company
plot_moving_averages(company_list)

In [None]:
# Function to calculate and plot daily returns for each company
def plot_daily_returns(company_list):
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    for i, company in enumerate(company_list):
        ax = axes[i // 2, i % 2]
        company['Daily Return'] = company['Adj Close'].pct_change()
        company['Daily Return'].plot(ax=ax, legend=True, linestyle='--', marker='o')
        ax.set_xlabel(None)
        ax.set_ylabel('Daily Return')
        ax.set_title(f"Daily Return for {company['Company'].iloc[0]}")
        ax.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Plot daily returns for each company
plot_daily_returns(company_list)


In [None]:
# Function to calculate and plot correlation heatmaps
def plot_correlation_heatmaps(df):
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 10))
    sns.heatmap(df['Adj Close'].corr(), annot=True, cmap='summer', ax=axes[0])
    axes[0].set_title('Correlation of Stock Returns')
    sns.heatmap(df['Adj Close'].corr(), annot=True, cmap='summer', ax=axes[1])
    axes[1].set_title('Correlation of Stock Closing Prices')
    plt.tight_layout()
    plt.show()


In [None]:
# Plot correlation heatmaps
plot_correlation_heatmaps(df)

In [None]:
# Function to preprocess and prepare the data for LSTM training
def prepare_lstm_data(dataset, training_data_ratio=0.95, lookback=60):
    # Scale the data using MinMaxScaler
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)
    
    # Split the data into training and testing sets
    training_data_len = int(np.ceil(len(dataset) * training_data_ratio))
    train_data = scaled_data[:training_data_len, :]
    test_data = scaled_data[training_data_len - lookback:, :]
    
    # Prepare the training data
    x_train, y_train = [], []
    for i in range(lookback, len(train_data)):
        x_train.append(train_data[i - lookback: i, 0])
        y_train.append(train_data[i, 0])
    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    
    # Prepare the testing data
    x_test, y_test = [], []
    for i in range(lookback, len(test_data)):
        x_test.append(test_data[i - lookback: i, 0])
        y_test.append(dataset[training_data_len + i, 0])
    x_test, y_test = np.array(x_test), np.array(y_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
    
    return x_train, y_train, x_test, y_test, scaler

In [None]:
# Prepare the data for LSTM training
dataset = df.filter(['Adj Close']).values
x_train, y_train, x_test, y_test, scaler = prepare_lstm_data(dataset)

# Build the LSTM model
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the LSTM model
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
# Function to make predictions using the trained LSTM model
def make_predictions(model, x_test, scaler):
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)
    return predictions

In [None]:
# Make predictions using the LSTM model
predictions = make_predictions(model, x_test, scaler)

In [None]:
# Function to calculate and display the root mean squared error (RMSE)
def calculate_rmse(predictions, y_test):
    rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
    print("Root Mean Squared Error (RMSE):", rmse)

In [None]:
# Calculate and display the RMSE
calculate_rmse(predictions, y_test)

# Function to plot the predicted and actual closing prices
def plot_predictions(y_test, predictions):
    plt.figure(figsize=(16, 6))
    plt.title('Model')
    plt.xlabel('Date', fontsize=18)
    plt.ylabel('Close Price USD ($)', fontsize=18)
    plt.plot(y_test, label='Actual')
    plt.plot(predictions, label='Predicted')
    plt.legend(loc='lower right')
    plt.show()

In [None]:
# Plot the predicted and actual closing prices
plot_predictions(y_test, predictions)