In [1]:
#importing various packages that we will use
import pandas as pd
import numpy as np
import os
import statsmodels.formula.api as smf
import statsmodels.api as sm
import pdb
import matplotlib.pyplot as plt

param = {'duration': {'x_SPX': 0, 'x_Oil': 0, 'x_EM_Equity': 0,'x_UST_10Y': 9.25,'x_VIX': 0, 'y_EM_Spread': 5},
         'carry': {'x_SPX': 1.9, 'x_Oil': 0, 'x_EM_Equity': 1.5,'x_UST_10Y': 3.0,'x_VIX': 0, 'y_EM_Spread': 5.2}}

  from pandas.core import datetools


In [7]:
def read_data(fl_name = 'Spread_Model_Sample_Data.csv'): #setting default filename to be used unless specified
    test_dir = r'C:\Users\Home\Documents\Models\Spread_Forecast' 
    fl = os.path.join(test_dir, fl_name) #joining directory and filename
    df = pd.read_csv(fl, header = 0, parse_dates = True) #reading csv file from source into a dataframe
    
    df.columns = ['Date','x_SPX', 'x_Oil', 'x_EM_Equity','x_UST_10Y','x_VIX', 'y_EM_Spread'] # setting column names for df
    df.loc[:, 'Date'] = pd.to_datetime(df['Date']) #converting date into datetime format for pandas
    df.loc[:, 'x_SPX'] = df['x_SPX'].astype('int64')
    cols = [col for col in df.columns if '_' in col]
    for col in df[cols]:
        df.loc[:, col + '_chg'] = df[col].diff(1)
    
    df1 = daily_total_return(df, cols)
        
    print (df1.info())
    return df1

def daily_total_return(df, cols):
    for col in df[cols]:
        df.loc[:, col + '_carry_rtn'] = param['carry'][col] / 100 / 360 #Daily Carry
        duration = param['duration'][col]
        if duration == 0:
            df.loc[:, col + '_price_rtn'] = df[col].pct_change() #Pct Change for price based inputs
        if duration != 0:
            df.loc[:, col + '_price_rtn'] = df[col].diff(1) * -1 * duration / 10000 #Spread in bp * Duration converted to a %age of 1
        df.loc[:, col + '_total_rtn'] = df.loc[:, col + '_carry_rtn'] + df.loc[:, col + '_price_rtn'] # Total Return = Carry + Px Return
    
    return df

In [17]:
def reg_m(df, typ = 'chg'):
    f = formula_create(df, typ)
    if typ == 'abs':
        results = smf.ols(formula='y_EM_Spread ~ x_Oil + x_EM_Equity + x_SPX + x_UST_10Y + x_VIX', data=df).fit()
        df['y_EM_Spread_pred'] = results.fittedvalues
        df['y_EM_Spread_resid'] = results.resid
        df.loc[:, 'y_EM_Spread_resid'] = round(df['y_EM_Spread_resid'] / df['y_EM_Spread'] * 100, 0)
        
    elif typ == 'chg':
        results = smf.ols(formula='y_EM_Spread_chg ~ x_Oil_chg + x_EM_Equity_chg + x_SPX_chg + x_UST_10Y_chg + x_VIX_chg', data=df).fit()
        df['y_EM_Spread_pred'] = results.fittedvalues
        df['y_EM_Spread_resid'] = results.resid
        df.loc[:, 'y_EM_Spread_resid'] = round(df['y_EM_Spread_resid'] / df['y_EM_Spread_chg'] * 100, 0)
        
    else:
        print ('No Type specified')
        exit()
    plot_reg(results, df, typ)
    print (results.summary())
    return results, df

def formula_create(df, typ):
    if typ == 'chg':
        cols_x = [col for col in df.columns if ('_chg' in col) & ('x_' in col)]
        cols_y = [col for col in df.columns if ('_chg' in col) & ('y_' in col)]
        formula_text = '{} ~ {}'.format(cols_y, ' + '.join(cols_x))
        
    elif typ == 'total_rtn':
        cols_x = [col for col in df.columns if ('_total_rtn' in col) & ('x_' in col)]
        cols_y = [col for col in df.columns if ('_total_rtn' in col) & ('y_' in col)]
        formula_text = '{} ~ {}'.format(cols_y, ' + '.join(cols_x))
        
    elif typ == 'abs':
        cols = [col for col in df.columns if ('_total_rtn' in col) & ('_chg' in col)]
        cols_x = [col for col in df.columns if 'x_' in col]
        cols_y = [col for col in df.columns if 'y_' in col]
        formula_text = '{} ~ {}'.format(cols_y, ' + '.join(cols_x))
    
    print (cols_x, /n, cols_y, /n, /n, formula_text)
    pdb.set_trace()
    return formula_text

In [18]:
def plot_reg(results, df, typ):
#     cell_text = []
#     cell_text.append([results.params])
    
    fig, ax = plt.subplots(figsize=(20, 10))
    if typ == 'abs':
        ax.plot(df['Date'], df['y_EM_Spread'], color='g', label = 'EM Spread', linewidth = 0.6)
        ax.set_ylabel("y_EM_Spread", fontsize=16, weight='bold', va='top')
    elif typ == 'chg':
        ax.plot(df['Date'], df['y_EM_Spread_chg'], color='g', label = 'EM Spread Change', linewidth = 0.6)
        ax.set_ylabel("y_EM_Spread_chg", fontsize=16, weight='bold', va='top')
    ax.plot(df['Date'], df['y_EM_Spread_pred'], color='orange', label = 'EM Spread Predict', linewidth = 1)
    
    ax.set_xlabel("Date", fontsize=16, weight='bold', va='top')
    ax.set_title("Linear Regression", fontsize=16, weight='bold', va='top')
    
    ax2 = ax.twinx()  # instantiate a second axes that shares the same x-axis    
    ax2.plot(df['Date'], df['y_EM_Spread_resid'], color='red', label = 'Residuals', linewidth= 0.2, ls = 'dashed')
    ax2.set_ylabel('Residual as %age of Spread', fontsize=16, weight='bold', va='top')
    
    ax.legend(loc='upper left')
    f = 'R Squared = ' + str(round(results.rsquared,3))
    ax.text(0.8, 0.95, f, horizontalalignment='center', verticalalignment='center',
            fontsize=16, weight='bold', va='top', transform=ax.transAxes)
    plt.show()
    print (results.params)

In [None]:
df = read_data()
res, df = reg_m(df, 'chg')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5176 entries, 0 to 5175
Data columns (total 31 columns):
Date                     5176 non-null datetime64[ns]
x_SPX                    5176 non-null int64
x_Oil                    5176 non-null float64
x_EM_Equity              5176 non-null float64
x_UST_10Y                5176 non-null float64
x_VIX                    5176 non-null float64
y_EM_Spread              5176 non-null int64
x_SPX_chg                5175 non-null float64
x_Oil_chg                5175 non-null float64
x_EM_Equity_chg          5175 non-null float64
x_UST_10Y_chg            5175 non-null float64
x_VIX_chg                5175 non-null float64
y_EM_Spread_chg          5175 non-null float64
x_SPX_carry_rtn          5176 non-null float64
x_SPX_price_rtn          5175 non-null float64
x_SPX_total_rtn          5175 non-null float64
x_Oil_carry_rtn          5176 non-null float64
x_Oil_price_rtn          5175 non-null float64
x_Oil_total_rtn          5175 non-null float

In [11]:
print (pd.__version__)

0.20.1
