In [4]:
# Initial imports
import numpy as np
import pandas as pd
import hvplot.pandas
import yfinance as yf
from datetime import datetime, timedelta


import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [35]:
# Import necessary libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

# Target: Energy_Sector_ETF 
# Features: Crude_Oil, Natural_Gas, Coal and Currencies of Energy-importing countries to USD.
# Define the list of tickers and currency pairs.
currencies = ['CAD', 'MXN', 'BRL', 'SAR', 'IQD', 'COP']
tickers = ['XLE', 'CL=F', 'NG=F', 'MTF=F'] + [f'USD{currency}=X' for currency in currencies]

# Fetch close prices for tickers and currency pairs for the last 365 days and drop NaN
energy = yf.download(tickers, period="1y")['Close'].dropna()

# Check if data was fetched successfully
if energy is not None:
    
    # List to store predicted prices
    predicted_prices_LR = []
    predicted_prices_RF = []
    
    # Create DataFrame to store predicted prices
    predicted_prices_df = pd.DataFrame(columns=['Date', 'Predicted Price (LR)', 'Predicted Price (RF)'])

    # Extract last date from original DataFrame
    last_date = energy.index[-1]

    for i in range(1, 91):  # Loop for 90 days
        
        # Features
        X = energy.drop(columns='XLE').shift(i).dropna()
        # Target
        y = energy['XLE'][i:]

        # Initialize the StandardScaler
        scaler = StandardScaler()

        #  Fit the scaler on the training data
        scaler.fit(X)

        # Transform the training data
        X_scaled = scaler.transform(X)

        # Create a model
        LR_model = LinearRegression()
        RF_model = RandomForestRegressor()

        # Fit the model
        LR_model.fit(X_scaled, y)
        RF_model.fit(X_scaled, y)

        # Get today's features (all except XLE)
        today_features = energy.drop(columns='XLE').iloc[-1]

        # Scale today's features
        today_features_scaled = scaler.transform([today_features])

        # Predict
        predicted_price_LR = LR_model.predict(today_features_scaled)[0]
        predicted_price_RF = RF_model.predict(today_features_scaled)[0]
        

        # Append predicted prices to the lists
        predicted_prices_LR.append(predicted_price_LR)
        predicted_prices_RF.append(predicted_price_RF)
    
    # Generate dates for the predicted prices
    new_dates = [last_date + timedelta(days=i) for i in range(1, 91)]

    # Assign the predicted prices and new dates to the DataFrame
    predicted_prices_df['Date'] = new_dates
    predicted_prices_df['Predicted Price (LR)'] = predicted_prices_LR
    predicted_prices_df['Predicted Price (RF)'] = predicted_prices_RF
#Plot predictions
predicted_prices_df.hvplot(x='Date', 
                           y=['Predicted Price (LR)', 'Predicted Price (RF)'], 
                           title='Predicted Prices', xlabel='Date', ylabel='Price',
                           width=800,        
                           height=300 
                          ).opts(yformatter='%.0f', active_tools=[])

[*********************100%%**********************]  10 of 10 completed


In [36]:
predicted_prices_df['Average Predicted Price'] = predicted_prices_df[['Predicted Price (LR)', 
                                                                      'Predicted Price (RF)']
                                                                    ].mean(axis=1)
predicted_prices_df.hvplot(x='Date',
                           y='Average Predicted Price',
                           title='Average Predicted Price',
                           xlabel='Date',ylabel='Price',
                           width=800,        
                           height=300 ).opts(yformatter='%.0f', active_tools=[])


In [55]:
# Calculate the daily price change
average_predictions_df = pd.DataFrame(predicted_prices_df[['Date', 'Average Predicted Price']])
average_predictions_df['Average Price Change'] = average_predictions_df['Average Predicted Price'].diff()

# Define a function to calculate profit/loss
def calculate_profit_loss(price_change):
    if price_change > 0:
        return 1  # Buy
    elif price_change < 0:
        return -1  # Sell
    else:
        return 0 

# Apply the function to calculate profit/loss
average_predictions_df['Profit/Loss'] = average_predictions_df['Average Price Change'].apply(calculate_profit_loss)
average_predictions_df = average_predictions_df.dropna()
average_predictions_df

Unnamed: 0,Date,Average Predicted Price,Average Price Change,Profit/Loss
1,2024-04-18,93.862137,0.375267,1
2,2024-04-19,94.187853,0.325716,1
3,2024-04-20,94.035635,-0.152219,-1
4,2024-04-21,94.716751,0.681116,1
5,2024-04-22,95.410225,0.693474,1
...,...,...,...,...
85,2024-07-11,85.101391,0.161104,1
86,2024-07-12,85.086917,-0.014474,-1
87,2024-07-13,85.249760,0.162843,1
88,2024-07-14,85.345728,0.095968,1
