In [18]:
# Initial imports
import numpy as np
import pandas as pd
import hvplot.pandas
import yfinance as yf
from yahoo_fin import options as op
from datetime import datetime, timedelta


import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [14]:
# Energy_Sector_ETF, Crude_Oil, Natural_Gas, Coal and Currencies of Energy-importing countries to USD.
# Define the list of tickers and currency pairs.
currencies = ['CAD', 'MXN', 'BRL', 'SAR', 'IQD', 'COP']
tickers = ['XLE', 'CL=F', 'NG=F', 'MTF=F'] + [f'USD{currency}=X' for currency in currencies]

# Fetch close prices for tickers and currency pairs for the last 365 days and drop na
energy = yf.download(tickers, period="1y")['Close'].dropna()

energy

[*********************100%%**********************]  10 of 10 completed


Ticker,CL=F,MTF=F,NG=F,USDBRL=X,USDCAD=X,USDCOP=X,USDIQD=X,USDMXN=X,USDSAR=X,XLE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-04-17,80.830002,136.149994,2.275,4.9087,1.33650,4412.529785,1277.770264,17.992880,3.750300,86.199997
2023-04-18,80.860001,136.649994,2.366,4.9398,1.33920,4426.750000,1459.000000,18.032200,3.746317,86.540001
2023-04-19,79.160004,137.350006,2.222,4.9843,1.33909,4487.379883,1458.000000,18.043489,3.745706,86.239998
2023-04-20,77.290001,138.500000,2.249,5.0743,1.34680,4532.250000,1291.671265,18.040501,3.745492,85.480003
2023-04-21,77.870003,138.399994,2.233,5.0477,1.34808,4525.149902,1458.000000,17.997910,3.745881,84.980003
...,...,...,...,...,...,...,...,...,...,...
2024-04-08,86.430000,118.000000,1.844,5.0549,1.36041,3773.340088,1281.642090,16.452000,3.750500,97.459999
2024-04-09,85.230003,117.900002,1.872,5.0239,1.35708,3771.350098,1278.822998,16.314301,3.746185,97.489998
2024-04-10,86.209999,118.250000,1.885,5.0076,1.35729,3765.250000,1308.000000,16.383310,3.746029,97.790001
2024-04-11,85.019997,120.750000,1.764,5.0662,1.36870,3802.250000,1308.000000,16.452499,3.746358,97.720001


In [15]:
energy= energy.reset_index()
energy['Date'] = pd.to_datetime(energy['Date'])
energy.set_index('Date', inplace=True)
energy

Ticker,CL=F,MTF=F,NG=F,USDBRL=X,USDCAD=X,USDCOP=X,USDIQD=X,USDMXN=X,USDSAR=X,XLE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-04-17,80.830002,136.149994,2.275,4.9087,1.33650,4412.529785,1277.770264,17.992880,3.750300,86.199997
2023-04-18,80.860001,136.649994,2.366,4.9398,1.33920,4426.750000,1459.000000,18.032200,3.746317,86.540001
2023-04-19,79.160004,137.350006,2.222,4.9843,1.33909,4487.379883,1458.000000,18.043489,3.745706,86.239998
2023-04-20,77.290001,138.500000,2.249,5.0743,1.34680,4532.250000,1291.671265,18.040501,3.745492,85.480003
2023-04-21,77.870003,138.399994,2.233,5.0477,1.34808,4525.149902,1458.000000,17.997910,3.745881,84.980003
...,...,...,...,...,...,...,...,...,...,...
2024-04-08,86.430000,118.000000,1.844,5.0549,1.36041,3773.340088,1281.642090,16.452000,3.750500,97.459999
2024-04-09,85.230003,117.900002,1.872,5.0239,1.35708,3771.350098,1278.822998,16.314301,3.746185,97.489998
2024-04-10,86.209999,118.250000,1.885,5.0076,1.35729,3765.250000,1308.000000,16.383310,3.746029,97.790001
2024-04-11,85.019997,120.750000,1.764,5.0662,1.36870,3802.250000,1308.000000,16.452499,3.746358,97.720001


In [44]:
# Import necessary libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Greate Features and Target 
X = energy.drop(columns='XLE').shift(1).dropna()
y = energy['XLE'][1:]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data
scaler.fit(X_train)

# Transform the training data
X_train_scaled = scaler.transform(X_train)

# Transform the testing data
X_test_scaled = scaler.transform(X_test)

# Create a model with scikit-learn
LR_model = LinearRegression()

# Fit the data into the model
LR_model.fit(X_train_scaled, y_train)

LR_predictions = LR_model.predict(X_test_scaled)

In [45]:
LR_predictions

array([84.6658019 , 97.3420066 , 86.86875602, 82.60549324, 87.75265422,
       88.69710053, 82.52135571, 92.28550933, 83.65700124, 85.59152995,
       96.69082484, 82.61424826, 92.64099179, 84.15543989, 89.71247456,
       87.82540957, 83.06494155, 80.3581775 , 79.90551467, 85.36420437,
       88.79818643, 80.75190371, 89.55346704, 82.05499751, 85.40271947,
       86.64348787, 97.87556555, 92.34112837, 80.97075702, 88.28467672,
       88.19964742, 80.00688476, 82.79010624, 85.70191364, 84.19377511,
       90.10169085, 79.54641055, 77.08540977, 79.6502479 , 96.88675207,
       85.22658945, 78.6331488 , 88.7326358 , 90.89322434, 79.56109165,
       92.60039143, 80.02718051, 81.26802214, 80.96553935, 84.74213635,
       89.0371744 , 87.61456345, 92.17774582, 84.28373463, 83.98577299,
       90.73938205, 88.03783194, 79.59161578, 81.66665397, 89.43234814,
       83.62670093, 83.83245936, 84.34807326])