# PREPROCESSING

In [1]:
# Core data libraries
import pandas as pd
import numpy as np

# Time series modeling
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error

# re
import re

from itertools import product

# Deep learning for sequence modeling
import tensorflow as tf
from keras.api.models import Sequential
from keras.api.layers import LSTM, Dense, Dropout, Bidirectional, Input


In [2]:
# preprocess data

test_start_date = "2023-01-01"
test_data = pd.read_csv("android_data_set.csv")

num_of_students = test_data["student_id"].nunique()

# convert student_id into str col
test_data["student_id"] = test_data["student_id"].astype(str)

# Identify columns with test scores (e.g., "fa1:30", "fa2:20")
test_columns = [col for col in test_data.columns if col.lower().startswith("fa")]

# Extract test numbers and max scores from column names
test_info = []
for col in test_columns:
    match = re.match(r"fa(\d+):(\d+)", col)
    if match:
        test_number, max_score = match.groups()
        test_info.append((col, int(test_number), int(max_score)))

# Define test dates (assuming weekly tests)
num_tests = test_data.shape[1] - 4  # Exclude student_id, name, section
test_dates = pd.date_range(
    start=test_start_date, periods=num_tests, freq="7D")

# Reshape from wide to long format
test_data_long = test_data.melt(id_vars=["student_id", "first_name", "last_name", "section"],
                                var_name="test",
                                value_vars=[col for col,
                                            _, _ in test_info],
                                value_name="score")
                            

# Extract test number & assign correct dates
# Extract test number and assign corresponding max score
# Convert test number using the pre-extracted data from test_info
test_data_long["test_number"] = test_data_long["test"].map(
    {col: test_number for col, test_number, _ in test_info}
)
test_data_long["max_score"] = test_data_long["test"].apply(
    lambda x: next(max_score for col, _,
                    max_score in test_info if col == x)
)
test_data_long["date"] = test_data_long["test_number"].apply(
    lambda x: test_dates[x - 1])

# Drop old test column
test_data_long.drop(columns=["test"], inplace=True)

# Handling missing values
test_data_long["score"].fillna(
    test_data_long["score"].mean())

# normalize test scores
test_data_long["normalized_scores"] = test_data_long["score"] / \
    test_data_long["max_score"]

# normalize passing threshold
test_data_long["normalized_passing_threshold"] = test_data_long["max_score"] * 0.75 / test_data_long["max_score"]

# make test number into int
test_data_long["test_number"] = test_data_long["test_number"].astype(int)

test_data_long


Unnamed: 0,student_id,first_name,last_name,section,score,test_number,max_score,date,normalized_scores,normalized_passing_threshold
0,109478130361,KIMBERT,ABRAHAM,ANDROID,13,1,20,2023-01-01,0.65,0.75
1,136584130063,JOVERT III.,ANDES,ANDROID,14,1,20,2023-01-01,0.70,0.75
2,109478120108,JAMES,BALOLOY,ANDROID,12,1,20,2023-01-01,0.60,0.75
3,136540141587,STEVEN,BOLIMA,ANDROID,14,1,20,2023-01-01,0.70,0.75
4,109471100020,MARK ANTHONY,CABANOG,ANDROID,14,1,20,2023-01-01,0.70,0.75
...,...,...,...,...,...,...,...,...,...,...
850,403072150096,HANNALY,VIPINOSA,ANDROID,2,15,20,2023-04-09,0.10,0.75
851,109469121797,JOHN LLOYD,JIMENEZ,ANDROID,7,15,20,2023-04-09,0.35,0.75
852,109323130439,CRILVEN G.,OTAO,ANDROID,19,15,20,2023-04-09,0.95,0.75
853,109478130104,LANCE DANIELL,PELAGIO,ANDROID,3,15,20,2023-04-09,0.15,0.75


In [3]:

def make_stationary(student_data):
    student_data = student_data.sort_values("date").copy()
    student_data["score_diff"] = student_data["score"].diff()
    student_data["normalized_score_diff"] = student_data["normalized_scores"].diff()
    student_data.dropna(inplace=True)
    return student_data


In [4]:

def grid_search_arima(train_series):
    p_values = range(0, 2)
    d_values = [1]  # Differencing is manually applied, so d=1
    q_values = range(0, 2)

    best_aic = float("inf")
    best_order = None
    best_model = None

    for p, d, q in product(p_values, d_values, q_values):
        try:
            model = ARIMA(train_series, order=(p, d, q), freq="7D")
            fitted_model = model.fit()
            if fitted_model.aic < best_aic:
                best_aic = fitted_model.aic
                best_order = (p, d, q)
                best_model = fitted_model
        except:
            continue

    return best_order, best_model


# ARIMA Prediction


In [5]:

def arima_prediction(arima_model, student_scores, last_normalized_score, last_max_score):
    """ Generates an ARIMA prediction for a given student's time series. """

    arima_pred = arima_model.forecast(steps=1)[0]

    # reverse differencing
    predicted_normalized_score = arima_pred + last_normalized_score

    # reverse normalization
    predicted_score = predicted_normalized_score * last_max_score

    return predicted_score

# LSTM

In [6]:
window_size = 5
lstm_model = None

def prepare_lstm_data(data, window_size):

    """ Converts the dataset into sequences for LSTM training. """
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
    return np.array(X), np.array(y)


def build_lstm_model(window_size):
    """ Builds and compiles an LSTM model. """
    model = Sequential([
        Input(shape=(window_size, 1)),
        Bidirectional(LSTM(64, activation="tanh", return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(32, activation="tanh")),
        Dense(16, activation="relu"),
        Dense(1)  # Predicts one score
    ])
    model.compile(optimizer="adam", loss="mse")
    return model


def train_lstm_model(processed_data):
    """ Trains a single LSTM model across all students. """
    global lstm_model

    # Prepare data for LSTM training
    all_scores = []
    for _, student_data in processed_data.groupby("student_id"):
        normalized_diff_scores = make_stationary(student_data.copy())
        scores = normalized_diff_scores.sort_values(
            "date")["normalized_score_diff"].tolist()
        all_scores.extend(scores)  # Collect all scores

    # Convert data into sequences
    X_train, y_train = prepare_lstm_data(all_scores, window_size)
    X_train = X_train.reshape(
        (X_train.shape[0], X_train.shape[1], 1))  # Reshape for LSTM

    # Build and train the LSTM model
    lstm_model = build_lstm_model(window_size)
    lstm_model.fit(X_train, y_train, epochs=32, batch_size=16)


def hybrid_prediction(student_scores, arima_model, last_normalized_score, last_max_score):
    """ Generates a hybrid prediction using both ARIMA and LSTM. """
    global lstm_model

    arima_pred = arima_prediction(arima_model=arima_model, student_scores=student_scores,
                                  last_normalized_score=last_normalized_score, last_max_score=last_max_score)

    # Use LSTM for refinement
    X_input = np.array(
        student_scores[-window_size:]).reshape(1, window_size, 1)
    lstm_pred = lstm_model.predict(X_input)[0][0]

    # reverse difference the lstm_pred
    lstm_pred_normalized = lstm_pred + last_normalized_score

    # reverse normalization
    lstm_pred = lstm_pred_normalized * last_max_score

    # Hybrid prediction: Combine both models
    hybrid_prediction = (arima_pred * 0.5) + (lstm_pred * 0.5)

    return hybrid_prediction

In [7]:
train_lstm_model(test_data_long)

Epoch 1/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.0051
Epoch 2/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0048
Epoch 3/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0050
Epoch 4/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0049
Epoch 5/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0048
Epoch 6/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0049
Epoch 7/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0049
Epoch 8/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0051
Epoch 9/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0050
Epoch 10/32
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0048
Epoch 11/

# TRAINING


In [8]:

def train_model(processed_data):
    """ Trains ARIMA for each student and applies the hybrid approach. """
    mae_df = pd.DataFrame(columns=["student_id", "arima_mae", "hybrid_mae", "best_prediction"])
    for student_id, student_data in processed_data.groupby("student_id"):

        differenced_student_data = make_stationary(student_data.copy())
        num_tests = differenced_student_data.shape[0]

        train = differenced_student_data.iloc[:num_tests-1].copy()
        test = differenced_student_data.iloc[num_tests-1:].copy()


        train.set_index("date", inplace=True)
        test.set_index("date", inplace=True)

        best_order, best_model = grid_search_arima(
            train["normalized_score_diff"])

        if best_order:
            last_max_score = train["max_score"].iloc[-1]
            # Base ARIMA Prediction
            arima_predictions = [arima_prediction(
                best_model, train["normalized_score_diff"], train["normalized_scores"].iloc[-1], last_max_score)]

            # Hybrid prediction
            hybrid_predictions = [hybrid_prediction(
                train["normalized_score_diff"], best_model, train["normalized_scores"].iloc[-1], last_max_score)]

            mae_arima = mean_absolute_error(test["score"], arima_predictions)
            mae_hybrid = mean_absolute_error(test["score"], hybrid_predictions)

            # determine whether the mae_arima is better than mae_hybrid and then use that as the predicted score
            if mae_arima < mae_hybrid:
                best_prediction = arima_predictions
            else:
                best_prediction = hybrid_predictions

            

            # Store the results
            new_row = [
                student_id,
                mae_arima,
                mae_hybrid,
                best_prediction
            ]
            mae_df.loc[len(mae_df)] = new_row

    return mae_df







In [15]:

def train_model_without_test(processed_data):
    """ Trains ARIMA for each student and applies the hybrid approach. """
    mae_df = pd.DataFrame(columns=["student_id", "arima_mae", "hybrid_mae", "best_prediction"])
    for student_id, student_data in processed_data.groupby("student_id"):

        differenced_student_data = make_stationary(student_data.copy())

        train = differenced_student_data


        train.set_index("date", inplace=True)

        best_order, best_model = grid_search_arima(
            train["normalized_score_diff"])

        if best_order:
            last_max_score = train["max_score"].iloc[-1]
            # Base ARIMA Prediction
            arima_predictions = [arima_prediction(
                best_model, train["normalized_score_diff"], train["normalized_scores"].iloc[-1], last_max_score)]

            # Hybrid prediction
            hybrid_predictions = [hybrid_prediction(
                train["normalized_score_diff"], best_model, train["normalized_scores"].iloc[-1], last_max_score)]

            mae_arima = mean_absolute_error([train["score"].iloc[-1]], arima_predictions)
            mae_hybrid = mean_absolute_error([train["score"].iloc[-1]], hybrid_predictions)

            # determine whether the mae_arima is better than mae_hybrid and then use that as the predicted score
            if mae_arima < mae_hybrid:
                best_prediction = arima_predictions
            else:
                best_prediction = hybrid_predictions

            

            # Store the results
            new_row = [
                student_id,
                mae_arima,
                mae_hybrid,
                best_prediction
            ]
            mae_df.loc[len(mae_df)] = new_row

    return mae_df







In [10]:
# train the arima model and make predictions
mae_df = train_model(test_data_long)
mae_df

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


Unnamed: 0,student_id,arima_mae,hybrid_mae,best_prediction
0,105522130030,0.30735,0.610487,[5.307349597861562]
1,108934110011,0.349716,0.718046,[14.650283534644565]
2,109323130439,1.0,1.543188,[18.0]
3,109461140179,1.000388,1.043382,[14.999612422303885]
4,109466130459,2.0,1.956812,[13.956812071613967]
5,109466130504,0.538224,0.725924,[5.538223569298584]
6,109466140189,0.692584,0.803104,[8.692583651633024]
7,109469121797,1.746635,1.83013,[8.746635314421132]
8,109469140122,0.154004,0.12019,[12.879809885030845]
9,109471100020,0.461538,0.187581,[20.18758131373069]


In [11]:
print(mae_df["arima_mae"].mean())
print(mae_df["hybrid_mae"].mean())

1.357728234561667
1.1797707435281464


In [16]:
mae_wo_test = train_model_without_test(test_data_long)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step

  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  self._init_dates(dates, freq)
  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


  arima_pred = arima_model.forecast(steps=1)[0]
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


  arima_pred = arima_model.forecast(steps=1)[0]


In [17]:
mae_wo_test


Unnamed: 0,student_id,arima_mae,hybrid_mae,best_prediction
0,105522130030,0.714286,0.400331,[4.599669204088434]
1,108934110011,0.340415,0.12702,[15.12701956318773]
2,109323130439,2.0,0.956812,[19.956812071613967]
3,109461140179,0.071354,0.007511,[15.992489071918762]
4,109466130459,2.0,1.043188,[10.956812071613967]
5,109466130504,0.500759,0.293567,[4.706432772506176]
6,109466140189,0.357104,0.22174,[7.778259937410066]
7,109469121797,0.21763,0.152003,[6.847997019654992]
8,109469140122,0.142859,0.114618,[12.885382428527286]
9,109471100020,0.428551,0.171088,[20.1710876777122]


In [18]:
print(mae_wo_test["arima_mae"].mean())
print(mae_wo_test["hybrid_mae"].mean())

0.6174077844803088
0.3175543044836461
