In [2]:
import pandas as pd
import numpy as np
import joblib
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

X = pd.read_csv('../Data Preprocessing/Final_Out.csv')

X_train = X[X['Day'] < 29]
X_test = X[X['Day'] >= 29]

scaler = MinMaxScaler()

X_train_scaled = scaler.fit_transform(X_train.to_numpy())
X_test_scaled = scaler.transform(X_test.to_numpy())

X_train = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test = pd.DataFrame(X_test_scaled, columns=X.columns)

X_train_copy = X_train.drop(columns=['Next Time to Fine Arts Annex', 'Next Time to Loyola House of Studies', 'Next Time to Grade School', 'Next Time to Gate 2.5', 'Next Time to Leong Hall', 'Next Time to Xavier Hall'])
X_test_copy = X_test.drop(columns=['Next Time to Fine Arts Annex', 'Next Time to Loyola House of Studies', 'Next Time to Grade School', 'Next Time to Gate 2.5', 'Next Time to Leong Hall', 'Next Time to Xavier Hall'])

Y_train_copy = X_train[['Next Time to Fine Arts Annex', 'Next Time to Loyola House of Studies', 'Next Time to Grade School', 'Next Time to Gate 2.5', 'Next Time to Leong Hall', 'Next Time to Xavier Hall']]
Y_test_copy = X_test[['Next Time to Fine Arts Annex', 'Next Time to Loyola House of Studies', 'Next Time to Grade School', 'Next Time to Gate 2.5', 'Next Time to Leong Hall', 'Next Time to Xavier Hall']]

autoe_data = []
r2_data = []

for i in range(1, len(X_train_copy.columns) + 1):
    # Define the autoencoder architecture
    input_dim = X_train_scaled.shape[1]
    encoding_dim_small = i  # Choose a suitable encoding dimension

    input_layer_small = Input(shape=(input_dim,))
    encoded_small = Dense(encoding_dim_small, activation="relu")(input_layer_small)
    decoded_small = Dense(input_dim, activation="sigmoid")(encoded_small)

    autoencoder_small = Model(input_layer_small, decoded_small)
    autoencoder_small.compile(loss="mse", optimizer="adam")

    # Train the autoencoder
    autoencoder_small.fit(X_train_scaled, X_train_scaled, epochs=10, batch_size=32)

    layer_names = [layer.name for layer in autoencoder_small.layers]

    # Use encoded features for further analysis
    bottleneck_layer_small = autoencoder_small.get_layer(layer_names[1])
    X_train_bottleneck = bottleneck_layer_small(X_train_scaled)

    np.savetxt(f'Autoencoder Files/ae-train-{i}.txt', X_train_bottleneck, fmt='%d')

    model = LinearRegression()
    model.fit(X_train_bottleneck, Y_train_copy)

    X_test_bottleneck = bottleneck_layer_small(X_test_scaled)
    np.savetxt(f'Autoencoder Files/ae-test-{i}.txt', X_test_bottleneck, fmt='%d')
    y_pred = model.predict(X_test_bottleneck)

    r2 = r2_score(Y_test_copy, y_pred)
    r2_data.append({
        'Component Count': i,
        'R^2': r2 
    })

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

In [None]:
# Retrive Numpy Data
def fetch_ae_file(component_num: int, is_train: bool = True) -> np.array:
    return np.loadtxt(f'Autoencoder Files/ae-{'train' if is_train else 'test'}-{component_num}.txt', dtype=float)