# Basic OLS Linear Regression for USD/INR Volatility

Pure baseline model:


In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
from sklearn.metrics import r2_score, mean_squared_error


In [2]:
def get_close(ticker, start='2010-01-01'):
    data = yf.download(ticker, start=start, progress=False)
    return data['Adj Close'] if 'Adj Close' in data.columns else data['Close']

prices = pd.concat([
    get_close('USDINR=X'),
    get_close('DX-Y.NYB'),
    get_close('CL=F'),
    get_close('^GSPC'),
    get_close('^NSEI'),
    get_close('^TNX')
], axis=1)

prices.columns = ['usdinr','dxy','crude','sp500','nifty','tnx']
prices = prices.dropna()
prices.head()

  data = yf.download(ticker, start=start, progress=False)
  data = yf.download(ticker, start=start, progress=False)
  data = yf.download(ticker, start=start, progress=False)
  data = yf.download(ticker, start=start, progress=False)
  data = yf.download(ticker, start=start, progress=False)
  data = yf.download(ticker, start=start, progress=False)


Unnamed: 0_level_0,usdinr,dxy,crude,sp500,nifty,tnx
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,46.287998,77.529999,81.510002,1132.98999,5232.200195,3.841
2010-01-05,46.119999,77.620003,81.769997,1136.52002,5277.899902,3.755
2010-01-06,45.720001,77.489998,83.18,1137.140015,5281.799805,3.808
2010-01-07,45.688,77.910004,82.660004,1141.689941,5263.100098,3.822
2010-01-08,45.518002,77.470001,82.75,1144.97998,5244.75,3.808


In [3]:
returns = np.log(prices / prices.shift(1)).dropna()
returns.head()

  result = func(self.values, **kwargs)


Unnamed: 0_level_0,usdinr,dxy,crude,sp500,nifty,tnx
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-05,-0.003636,0.00116,0.003185,0.003111,0.008696,-0.022644
2010-01-06,-0.008711,-0.001676,0.017097,0.000545,0.000739,0.014016
2010-01-07,-0.0007,0.005405,-0.006271,0.003993,-0.003547,0.00367
2010-01-08,-0.003728,-0.005664,0.001088,0.002878,-0.003493,-0.00367
2010-01-11,-0.003521,-0.006085,-0.002783,0.001745,0.000886,0.002623


In [4]:
rv = pd.DataFrame(index=returns.index)
for c in returns.columns:
    rv[c + '_rv'] = np.sqrt((returns[c]**2).rolling(10).sum())
rv = rv.dropna()
rv.head()

Unnamed: 0_level_0,usdinr_rv,dxy_rv,crude_rv,sp500_rv,nifty_rv,tnx_rv
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-19,0.013711,0.013081,0.03864,0.021759,0.015165,0.04701
2010-01-20,0.014465,0.016911,0.042455,0.024027,0.012447,0.043209
2010-01-21,0.01155,0.01683,0.043723,0.030705,0.027676,0.042953
2010-01-22,0.011795,0.015946,0.04786,0.037791,0.029752,0.042948
2010-01-25,0.011623,0.014951,0.048804,0.03796,0.030071,0.043697


In [5]:
rv['rv_lag1'] = rv['usdinr_rv'].shift(1)
rv['rv_lag5'] = rv['usdinr_rv'].shift(5)

for c in ['dxy_rv','crude_rv','sp500_rv','nifty_rv','tnx_rv']:
    rv[c] = rv[c].shift(1)


In [6]:
rv['target'] = np.log(rv['usdinr_rv'].shift(-1))
rv = rv.dropna()
rv.head()

Unnamed: 0_level_0,usdinr_rv,dxy_rv,crude_rv,sp500_rv,nifty_rv,tnx_rv,rv_lag1,rv_lag5,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-27,0.012235,0.014951,0.048804,0.03796,0.030071,0.043697,0.011623,0.013711,-4.524822
2010-01-28,0.010837,0.015018,0.053198,0.037925,0.043467,0.043788,0.012235,0.014465,-4.442172
2010-01-29,0.01177,0.015261,0.048799,0.038611,0.042922,0.03524,0.010837,0.01155,-4.39337
2010-02-01,0.012359,0.01677,0.047793,0.038982,0.042792,0.033654,0.01177,0.011795,-4.430873
2010-02-02,0.011904,0.016926,0.052063,0.041405,0.042658,0.033409,0.012359,0.011623,-4.371275


In [7]:
features = ['rv_lag1','rv_lag5','dxy_rv','crude_rv','sp500_rv','nifty_rv','tnx_rv']
X = rv[features]
y = rv['target']

split = int(0.8 * len(rv))
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)


In [8]:
model = sm.OLS(y_train, X_train).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                 target   R-squared:                       0.689
Model:                            OLS   Adj. R-squared:                  0.689
Method:                 Least Squares   F-statistic:                     957.0
Date:                Thu, 18 Dec 2025   Prob (F-statistic):               0.00
Time:                        18:17:29   Log-Likelihood:                -305.60
No. Observations:                3027   AIC:                             627.2
Df Residuals:                    3019   BIC:                             675.3
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -5.0450      0.016   -323.608      0.0

In [9]:
pred = model.predict(X_test)
print('R2:', r2_score(y_test, pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, pred)))

R2: -0.10780940093749103
RMSE: 0.7064605722243291
