<a href="https://colab.research.google.com/github/Apoorv-Krishn-DAS/-Apoorv-Krishn-DAS/blob/main/Algorithmic_Trading_checkpoint_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Initial Imports:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# To run models:
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from joblib import dump, load

# For visualizations:
import matplotlib.pyplot as plt
import seaborn as sns
%pylab inline
%matplotlib inline

Populating the interactive namespace from numpy and matplotlib


# Pre-Processing:  
# 3-Factor Model

In [6]:
# Define function to read in factors from csv and return cleaned dataframe:
def get_factors(factors):
  factor_file=factors+".csv"
  factor_df = pd.read_csv(factor_file)

  # Clean factor dataframe:
  factor_df = factor_df.rename(columns={
    'Unnamed: 0': 'Date',
  })

  factor_df['Date'] = factor_df['Date'].apply(lambda x: pd.to_datetime(str(x), format='%Y%m%d'))

  # Set "Date" as Index:
  factor_df = factor_df.set_index('Date')

  return factor_df

In [7]:
# Confirm Fama-French dataframe:
factors = get_factors("french_fama")
factors.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07-01,0.1,-0.24,-0.28,0.009
1926-07-02,0.45,-0.32,-0.08,0.009
1926-07-06,0.17,0.27,-0.35,0.009
1926-07-07,0.09,-0.59,0.03,0.009
1926-07-08,0.21,-0.36,0.15,0.009


In [8]:
# Do same thing as above, but for the individual stock CSV:
def choose_stock(ticker):
  ticker_file=ticker+".csv"
  stock=pd.read_csv(ticker_file, index_col='Date', parse_dates=True, infer_datetime_format=True)
  stock["Returns"]=stock["Close"].dropna().pct_change()*100
  stock.index = pd.Series(stock.index).dt.date

  return stock

In [9]:
# Read in ATT dataframe using above function:
ticker="T"
stock=choose_stock(ticker)
stock.head()

Unnamed: 0_level_0,Close,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03,47.0,
2000-01-04,44.25,-5.851064
2000-01-05,44.94,1.559322
2000-01-06,43.75,-2.647975
2000-01-07,44.13,0.868571


In [10]:
# Concatenate Fama-French dataframe with Stock dataframe:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview dataframe:
combined_df.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,Close,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-11-01 00:00:00,0.05,-0.77,0.1,33.55,-0.297177


In [11]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [12]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

Date
2017-11-01 00:00:00    33.55
Name: Close, dtype: float64

In [13]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

ValueError: ignored

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Define function to generate signals dataframe for algorithm:
def generate_signals(input_df, start_capital=100000, share_count=2000):
  # Set initial capital:
  initial_capital = float(start_capital)

  signals_df = input_df.copy()

  # Set the share size:
  share_size = share_count

  # Take a 500 share position where the Buy Signal is 1 (prior day's predictions greater than prior day's returns):
  signals_df['Position'] = share_size * signals_df['Buy Signal']

  # Make Entry / Exit Column:
  signals_df['Entry/Exit']=signals_df["Buy Signal"].diff()

  # Find the points in time where a 500 share position is bought or sold:
  signals_df['Entry/Exit Position'] = signals_df['Position'].diff()

  # Multiply share price by entry/exit positions and get the cumulative sum:
  signals_df['Portfolio Holdings'] = signals_df['Close'] * signals_df['Entry/Exit Position'].cumsum()

  # Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio:
  signals_df['Portfolio Cash'] = initial_capital - (signals_df['Close'] * signals_df['Entry/Exit Position']).cumsum()

  # Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments):
  signals_df['Portfolio Total'] = signals_df['Portfolio Cash'] + signals_df['Portfolio Holdings']

  # Calculate the portfolio daily returns:
  signals_df['Portfolio Daily Returns'] = signals_df['Portfolio Total'].pct_change()

  # Calculate the cumulative returns:
  signals_df['Portfolio Cumulative Returns'] = (1 + signals_df['Portfolio Daily Returns']).cumprod() - 1

  signals_df = signals_df.dropna()

  return signals_df

In [None]:
# Generate and view signals dataframe using generate signals function
signals_df=generate_signals(y_test)
signals_df.head(10)

In [None]:
def algo_evaluation(signals_df):
  # Prepare dataframe for metrics
  metrics = [
      'Annual Return',
      'Cumulative Returns',
      'Annual Volatility',
      'Sharpe Ratio',
      'Sortino Ratio']

  columns = ['Backtest']

  # Initialize the DataFrame with index set to evaluation metrics and column as `Backtest` (just like PyFolio)
  portfolio_evaluation_df = pd.DataFrame(index=metrics, columns=columns)
  # Calculate cumulative returns:
  portfolio_evaluation_df.loc['Cumulative Returns'] = signals_df['Portfolio Cumulative Returns'][-1]
  # Calculate annualized returns:
  portfolio_evaluation_df.loc['Annual Return'] = (signals_df['Portfolio Daily Returns'].mean() * 252)
  # Calculate annual volatility:
  portfolio_evaluation_df.loc['Annual Volatility'] = (signals_df['Portfolio Daily Returns'].std() * np.sqrt(252))
  # Calculate Sharpe Ratio:
  portfolio_evaluation_df.loc['Sharpe Ratio'] = (signals_df['Portfolio Daily Returns'].mean() * 252) / (signals_df['Portfolio Daily Returns'].std() * np.sqrt(252))

  #Calculate Sortino Ratio/Downside Return:
  sortino_ratio_df = signals_df[['Portfolio Daily Returns']].copy()
  sortino_ratio_df.loc[:,'Downside Returns'] = 0

  target = 0
  mask = sortino_ratio_df['Portfolio Daily Returns'] < target
  sortino_ratio_df.loc[mask, 'Downside Returns'] = sortino_ratio_df['Portfolio Daily Returns']**2
  down_stdev = np.sqrt(sortino_ratio_df['Downside Returns'].mean()) * np.sqrt(252)
  expected_return = sortino_ratio_df['Portfolio Daily Returns'].mean() * 252
  sortino_ratio = expected_return/down_stdev

  portfolio_evaluation_df.loc['Sortino Ratio'] = sortino_ratio


  return portfolio_evaluation_df

In [None]:
# Generate Metrics for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Define function to evaluate the underlying asset:
def underlying_evaluation(signals_df):
  underlying=pd.DataFrame()
  underlying["Close"]=signals_df["Close"]
  underlying["Portfolio Daily Returns"]=underlying["Close"].pct_change()
  underlying["Portfolio Daily Returns"].fillna(0,inplace=True)
  underlying['Portfolio Cumulative Returns']=(1 + underlying['Portfolio Daily Returns']).cumprod() - 1

  underlying_evaluation=algo_evaluation(underlying)

  return underlying_evaluation

In [None]:
# Define function to return algo evaluation relative to underlying asset combines the two evaluations into a single dataframe
def algo_vs_underlying(signals_df):
  metrics = [
      'Annual Return',
      'Cumulative Returns',
      'Annual Volatility',
      'Sharpe Ratio',
      'Sortino Ratio']

  columns = ['Algo','Underlying']
  algo=algo_evaluation(signals_df)
  underlying=underlying_evaluation(signals_df)

  comparison_df=pd.DataFrame(index=metrics,columns=columns)
  comparison_df['Algo']=algo['Backtest']
  comparison_df['Underlying']=underlying['Backtest']

  return comparison_df

# Generate Metrics for Function vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Define function which accepts daily signals dataframe and returns evaluations of individual trades:
def trade_evaluation(signals_df):

  #initialize dataframe
  trade_evaluation_df = pd.DataFrame(
    columns=[
        'Entry Date',
        'Exit Date',
        'Shares',
        'Entry Share Price',
        'Exit Share Price',
        'Entry Portfolio Holding',
        'Exit Portfolio Holding',
        'Profit/Loss']
  )


  entry_date = ''
  exit_date = ''
  entry_portfolio_holding = 0
  exit_portfolio_holding = 0
  share_size = 0
  entry_share_price = 0
  exit_share_price = 0

  # Loop through signal DataFrame
  # If `Entry/Exit` is 1, set entry trade metrics
  # Else if `Entry/Exit` is -1, set exit trade metrics and calculate profit,
  # Then append the record to the trade evaluation DataFrame
  for index, row in signals_df.iterrows():
      if row['Entry/Exit'] == 1:
          entry_date = index
          entry_portfolio_holding = row['Portfolio Total']
          share_size = row['Entry/Exit Position']
          entry_share_price = row['Close']

      elif row['Entry/Exit'] == -1:
          exit_date = index
          exit_portfolio_holding = abs(row['Portfolio Total'])
          exit_share_price = row['Close']
          profit_loss = exit_portfolio_holding - entry_portfolio_holding
          trade_evaluation_df = trade_evaluation_df.append(
              {
                  'Entry Date': entry_date,
                  'Exit Date': exit_date,
                  'Shares': share_size,
                  'Entry Share Price': entry_share_price,
                  'Exit Share Price': exit_share_price,
                  'Entry Portfolio Holding': entry_portfolio_holding,
                  'Exit Portfolio Holding': exit_portfolio_holding,
                  'Profit/Loss': profit_loss
              },
              ignore_index=True)

  # Print the DataFrame
  return trade_evaluation_df

In [None]:
# Generate Evaluation table:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

# ANOVA Table / Other Visualizations for 3-Factor Models:  
# ATT:

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot P&L Histrogram:
trade_evaluation_df["Profit/Loss"].hist(bins=20)

In [None]:
# Define function that plots Algo Cumulative Returns vs. Underlying Cumulative Returns:
def underlying_returns(signals_df):
  underlying=pd.DataFrame()
  underlying["Close"]=signals_df["Close"]
  underlying["Underlying Daily Returns"]=underlying["Close"].pct_change()
  underlying["Underlying Daily Returns"].fillna(0,inplace=True)
  underlying['Underlying Cumulative Returns']=(1 + underlying['Underlying Daily Returns']).cumprod() - 1
  underlying['Algo Cumulative Returns']=signals_df["Portfolio Cumulative Returns"]

  graph_df=underlying[["Underlying Cumulative Returns", "Algo Cumulative Returns"]]

  return graph_df

In [None]:
# Generate Cumulative Return plot using above defined function:
underlying_returns(signals_df).plot(figsize=(20,10))

# DIS:

In [None]:
# Use pre-defined function to read in $DIS data:
ticker="DIS"
stock=choose_stock(ticker)
stock.head()

In [None]:
# Concatenate Fama-French DataFrame with Stock DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame
combined_df.head()

In [None]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [None]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

In [None]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics table for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics table for Algorithm vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate Metrics table for Disney using pre-defined function:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20,10))

## SPY

In [None]:
# Read in SPY data:
ticker="SPY"
stock=choose_stock(ticker)
stock.head()

In [None]:
# Concatenate Fama-French DataFrame with Stock DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame
combined_df.head()

In [None]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [None]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

In [None]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics for Algorithm vs Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate and view Metrics dataframe:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20,10))

# Fama-French Five Factor Model:  
# ATT


In [None]:
# Confirm New Fama-French 5-Factor DataFrame:
factors = get_factors("french_fama_5")
factors.head()

In [None]:
# Read-in ATT dataframe:
ticker="T"
stock=choose_stock(ticker)
stock.head()

In [None]:
# Concatenate Fama-French DataFrame with Stock DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame
combined_df.head()

In [None]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [None]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

In [None]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics for Algorithm vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate and view metrics dataframe:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
underlying_returns(signals_df).plot(figsize=(20,10))

## DIS


In [None]:
# Read-in Disney dataframe:
ticker="DIS"
stock=choose_stock(ticker)
stock.head()

In [None]:
# Concatenate Fama-French DataFrame with Stock DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame
combined_df.head()

In [None]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [None]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

In [None]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics for Algorithm vs. Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate and view Metrics dataframe:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20,10))

## SPY


In [None]:
# Read-in SPY dataframe:
ticker="SPY"
stock=choose_stock(ticker)
stock.head()

In [None]:
# Concatenate Fama-French DataFrame with Stock DataFrame:
combined_df = pd.concat([factors, stock], axis='columns', join='inner')

# Drop nulls:
combined_df = combined_df.dropna()
combined_df = combined_df.drop('RF', axis=1)

# Preview DataFrame
combined_df.head()

In [None]:
# Define X and y variables:
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)
y = combined_df.loc[:, 'Returns']

In [None]:
# Split into Training/Testing Data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]
close_test=combined_df["Close"][split:]
close_test

In [None]:
# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test['Predictions'] = predictions
y_test["Close"]=close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test['Buy Signal'] = np.where(y_test['Predictions'] > y_test['Returns'], 1.0,0.0)

# Drop nulls:
y_test=y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df=generate_signals(y_test)

In [None]:
# Generate Metrics for Algorithm:
algo_evaluation(signals_df)

In [None]:
# Generate Metrics for Algorithm vs Buy-and-Hold Strategy:
algo_vs_underlying(signals_df)

In [None]:
# Generate Metrics dataframe:
trade_evaluation_df=trade_evaluation(signals_df)
trade_evaluation_df

In [None]:
# Set X and y variables:
y = combined_df.loc[:, 'Returns']
X = combined_df.drop('Returns', axis=1)
X = X.drop('Close',axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig = plt.figure(figsize=(12,8)))
plt.show()

In [None]:
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20,10))