In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
from tensorflow.keras.optimizers import Ftrl
from tensorflow.keras.callbacks import TensorBoard
import time
import tensorflow_addons as tfa

In [None]:
#Path of the linux file
data_linux = ""

In [None]:
def generate_train_test_samples( hdf_path: str, target_columns: list, columns_to_drop: list,test_size: float):
        try:
            df = pd.read_hdf(hdf_path)
        except:
            df = pd.read_csv(hdf_path)
            
        y = df[target_columns].to_numpy()
        X = df.drop(columns= columns_to_drop).to_numpy()
        
        X_train , X_test , y_train , y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        X_train , X_val , y_train , y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


        return (X_train, y_train, X_test, y_test,X_val,y_val)

In [None]:
class SaveStatsCallback(tf.keras.callbacks.Callback):
    def __init__(self, file_name):
        super().__init__()
        self.file_name = "Logs/"+file_name
        self.start_time = None

    def on_train_begin(self, logs=None):
        self.start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        with open(self.file_name, 'a') as f:
            f.write(f"Epoch {epoch + 1}:\n")
            for key, value in logs.items():
                f.write(f"{key}: {value}\n")

    def on_train_end(self, logs=None):
        total_time = time.time() - self.start_time
        with open(self.file_name, 'a') as f:
            f.write(f"Total training time: {total_time} seconds\n")

In [None]:
(X_train,
 y_train,
 X_test,
 y_test,
 X_validation,
 y_validation) = generate_train_test_samples(data_linux, ["perf"],["perf", "active_options"], 0.2)

In [None]:
X_train.shape

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(X_train.shape[1], activation=tf.keras.layers.ELU(), input_shape=(X_train.shape[1],), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.0),
    tf.keras.layers.Dense(X_train.shape[1], activation=tf.keras.layers.ELU(), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.0),
    tf.keras.layers.Dense(1)
])

In [None]:
adamw = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.01)

In [None]:
model.compile(optimizer="adam", loss=tf.keras.losses.Huber(), metrics=[tf.keras.losses.Huber(),tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.MeanAbsolutePercentageError()])

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

In [None]:
start = time.time()
history = model.fit(X_train, y_train, epochs=500, validation_data=(X_test, y_test), callbacks=[early_stopping, SaveStatsCallback("result_smooth")])
end = time.time()

In [None]:
print(end - start)

In [None]:
import matplotlib.pyplot as plt

In [None]:
pred = model.predict(X_test)
plt.plot(pred[:100], color='b')
plt.plot(y_test[:100], color = 'r', linestyle = 'dashed')
plt.savefig("tensorflow_prediction_best_model.png", dpi=500)

## Feature Selection test

In [None]:
import pandas as pd
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
from tensorflow.keras.optimizers import Ftrl
from tensorflow.keras.callbacks import TensorBoard
import time
rank = "..//results//feature_importance_DT.csv"
pd.read_csv(rank)

In [None]:
def generate_train_test_samples2( df: pd.DataFrame, target_columns: list, features_list: list, device: str = 'cuda',
                                 test_size: float = 0.2):
            
        y = df[target_columns].to_numpy()
        
        X = df[features_list].to_numpy()
        
        X_train , X_test , y_train , y_test = train_test_split(X, y, test_size=test_size, random_state=42)

        X_train , X_test , y_train , y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        X_train , X_val , y_train , y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
        
        return (X_train, y_train, X_test, y_test,X_val,y_val)

In [None]:
percentages =[0.9,0.8,0.7,0.5,0.3,0.1]
fiels = os.listdir("..//results//")
files = fiels[:-1]

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(9468, activation=tf.keras.layers.ELU(), input_shape=(9468,), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.0),
    tf.keras.layers.Dense(9468, activation=tf.keras.layers.ELU(), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.0),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(optimizer="adam", loss=tf.keras.losses.Huber(), metrics=[tf.keras.losses.Huber(),tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.MeanAbsolutePercentageError()])

In [None]:
def train_model_for_all_features(files: list[str], percentages: list[int]):
    with open("feature_selection_experimente_tensorflow.csv", "w") as f: 
        df = pd.read_hdf("data/dados.h5")
        for file in files:
            name = file.split("_")
            rank = pd.read_csv(f"..//results//{file}")
            for percentage in percentages:
                cut = rank['importance'].quantile(percentage)
                top_percent = rank[rank['importance'] >= cut]["features"]
                number_of_features = len(top_percent)
                data = generate_train_test_samples2(df,["perf"],top_percent)
                start = time.time()
                history = model.fit(data[0],data[1], epochs=500, validation_data=(data[2], data[3]), callbacks=[early_stopping])
                end = time.time()
                
                f.write(f"feature selection {file}")
                f.write(f"percentage {1 - percentage}")
                f.write(f"time to conclusion {end - start}")
                f.write(f"final loss {history.history['loss'][-1]}")

In [None]:
train_model_for_all_features(files, percentages)

In [None]:
model.compile(optimizer="adam", loss=tf.keras.losses.Huber(), metrics=[tf.keras.losses.Huber(),tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.MeanAbsolutePercentageError()])