In [1]:
"""This Program Uses Stock Data and Regression Methods to Verify CAPM Model"""

'This Program Uses Stock Data and Regression Methods to Verify CAPM Model'

In [None]:
import pandas as pd
import numpy as np

def data_analysis():
    stock_symbols = {'NOV', 'PDCO'}  # <- can input as many stock stickers as needed 
    data = pd.read_csv('Clean_Data_Out.csv')
    
    # Filter to select only 5 stocks
    data = data.loc[data['Name'].isin(stock_symbols)]
    
    # Convert Date column from string to datetime
    data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
    
    # Add column for daily returns
    data['daily_return'] = np.log(data['close'] / data['open'])
    
    
    treasury_data = pd.read_csv('Treasury Bill Yield Curve.csv')
    
    # T-bill not available for certain dates (dates are not consecutive). 
    # Use linear interpolation to proxy for the t-bill rate for that date.
    for i, row in treasury_data.iterrows():
        if row['10 Yr T bill'] == 'NA':
            # Assuming that isnull does not occur when i == 0 and when i == len(treasury_data) - 1
            treasury_data['10 Yr T bill'][i] = 0.5 * (
                treasury_data['10 Yr T bill'][i - 1] +
                treasury_data['10 Yr T bill'][i + 1]
            )
    
    # Convert Date column from string to datetime
    treasury_data['Date'] = pd.to_datetime(treasury_data['Date'], format='%m/%d/%Y')
    
    # Reformat 10 Yr T bill return into decimals (original data was in %)
    treasury_data['10 Yr T bill returns'] = treasury_data['10 Yr T bill'] / 100
    
    
    # Calculate SP 500 returns
    treasury_data['SP 500 Returns'] = treasury_data['SP 500 '] / treasury_data['SP 500 '].shift() - 1
    
    # Perform a left join
    data = pd.merge(data, treasury_data, left_on='date', right_on='Date', how='left')
    
        
    # Calculate Beta for individual stock. 
    
    data['CAPM return'] = 0
    for stock_symbol in stock_symbols:
        stock_data = data.loc[data['Name'] == stock_symbol]
        
        # Covariance between individual stock vs. market (SP 500):
        cov = np.mean(
            stock_data['daily_return'] - np.mean(stock_data['daily_return']) *
            stock_data['SP 500 Returns'] - np.mean(stock_data['SP 500 Returns'])
        )
        
        # Variance of market
        var = np.var(stock_data['SP 500 Returns'])
        
        # Beta = Covariance [Return of Asset & Return of Market] / Variance [Return of Market]
        beta = cov / var
        
        
        # CAPM Return = Rf + Beta * (Mkt Return - Rf)
        # Rf = T bill rate
        # Mkt Return = SP 500
        data.loc[data['Name'] == stock_symbol, 'CAPM return'] = stock_data['10 Yr T bill returns'] + beta * (stock_data['SP 500 Returns'] - stock_data['10 Yr T bill returns'])
    
    # Finally, calculate the difference between actual daily return vs. CAPM predicted return
    data['residuals'] = data['CAPM return'] - data['daily_return']
    return data

data = data_analysis()
data.to_csv('Cleaned_Data_Analyzed.csv')