In [45]:
import pandas as pd
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.api import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [46]:
from nsepy import get_history as gh
import datetime as dt

In [47]:
# Fetching data
#historical stock data for AXISBANK
start = dt.datetime(2021, 7, 1)
end = dt.datetime(2022, 1, 27)
symbol = 'AXISBANK.NS'
stk_data = yf.download(symbol, start=start, end=end)

[*********************100%%**********************]  1 of 1 completed


In [48]:
# Selecting relevant columns for analysis
stk_data = stk_data[["Open", "High", "Low", "Close"]]

In [49]:
stk_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-07-01,750.000000,754.400024,744.500000,746.450012
2021-07-02,747.599976,752.849976,744.000000,750.650024
2021-07-05,757.099976,762.000000,753.400024,760.349976
2021-07-06,757.000000,767.000000,755.200012,758.349976
2021-07-07,755.900024,764.000000,752.250000,762.900024
...,...,...,...,...
2022-01-19,725.200012,731.950012,718.700012,729.599976
2022-01-20,731.049988,737.150024,722.150024,728.900024
2022-01-21,722.000000,725.250000,708.049988,712.500000
2022-01-24,715.000000,716.400024,688.200012,704.599976


In [50]:
# Scaling the data to the range [0, 1]
Ms = MinMaxScaler()
data1 = Ms.fit_transform(stk_data)
print("Len:", data1.shape)

Len: (143, 4)


In [51]:
# Converting scaled data back to DataFrame for easier manipulation
data1 = pd.DataFrame(data1, columns=["Open", "High", "Low", "Close"])

In [52]:
# Splitting the data into training and testing sets (80% train, 20% test)
training_size = round(len(data1) * 0.80)
print(training_size)
X_train = data1[:training_size]
X_test = data1[training_size:]
print("X_train length:", X_train.shape)
print("X_test length:", X_test.shape)
y_train = data1[:training_size]
y_test = data1[training_size:]
print("y_train length:", y_train.shape)
print("y_test length:", y_test.shape)

114
X_train length: (114, 4)
X_test length: (29, 4)
y_train length: (114, 4)
y_test length: (29, 4)


In [53]:
import warnings
warnings.filterwarnings("ignore")

In [54]:
# Dictionary to store performance metrics
performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}

## SES: Simple model focusing on recent observations, suitable for stationary data without trends or seasonality.

In [55]:
def cominbation_SES(dataset, listt):
    print(listt)
    test_obs = 28
    performance = {"Model": [], "RMSE": [], "MaPe": [], "Lag": [], "Test": []}
    predictions = {}

    for col in listt:
        print(f"Processing column: {col}")
        
        datasetTwo = dataset[[col]]
        train = datasetTwo[:-test_obs]
        test = datasetTwo[-test_obs:]
        
        # Fitting the Simple Exponential Smoothing model
        model = ExponentialSmoothing(train, trend=None, seasonal=None)
        result = model.fit()
        
        # Forecasting future values
        pred = result.forecast(steps=test_obs)  # Forecast for the next 28 periods
        preds = pd.DataFrame(pred, columns=[col])  # Convert predictions to DataFrame
        
        # Save predictions to CSV
        preds.to_csv(f"ses_forecasted_{col}_{test_obs}.csv", index=False)
        
        # Evaluating the model
        rmse = round(mean_squared_error(test, pred, squared=False), 2)  # Calculate RMSE
        mape = mean_absolute_percentage_error(test, pred)  # Calculate MAPE
        
        # Storing performance metrics
        performance["Model"].append(col)
        performance["RMSE"].append(rmse)
        performance["MaPe"].append(mape)
        performance["Lag"].append(None)  # SES does not use lag
        performance["Test"].append(test_obs)
        
        predictions[col] = preds
    
    # Debugging: Print lengths of lists
    for key, value in performance.items():
        print(f"{key} length: {len(value)}")
    
    # Convert performance dictionary to DataFrame
    try:
        perf = pd.DataFrame(performance)
    except ValueError as e:
        print("Error while creating DataFrame:", e)
        return None, None

    return perf, predictions

In [56]:
listt=["Close","High","Open","Low"]

In [57]:
perf, predictions = cominbation_SES(data1, listt)

['Close', 'High', 'Open', 'Low']
Processing column: Close
Processing column: High
Processing column: Open
Processing column: Low
Model length: 4
RMSE length: 4
MaPe length: 4
Lag length: 4
Test length: 4


In [58]:
data1

Unnamed: 0,Open,High,Low,Close
0,0.508982,0.441549,0.543815,0.491495
1,0.497006,0.433854,0.540997,0.513145
2,0.544411,0.479275,0.593970,0.563144
3,0.543912,0.504095,0.604114,0.552835
4,0.538423,0.489203,0.587489,0.576289
...,...,...,...,...
138,0.385230,0.330107,0.398422,0.404639
139,0.414421,0.355920,0.417864,0.401031
140,0.369261,0.296848,0.338405,0.316495
141,0.334331,0.252916,0.226543,0.275773


In [59]:
perf

Unnamed: 0,Model,RMSE,MaPe,Lag,Test
0,Close,0.15,0.841872,,28
1,High,0.14,1.296879,,28
2,Open,0.15,0.781288,,28
3,Low,0.16,1.131016,,28
