In [None]:
r = requests.get(url)

soup = bs(r.text, "lxml")

table = soup.find(id = "GridView1")
rows = table.find_all("tr")

var_names = []
var_descrs = []
file_names = []
file_descrs = []
start_years = []
end_years = []
components = []
constraints = []


for row in range(1, len(rows)):
    var_name, var_descr, file_name, file_descr, start_year, end_year, component, constraint = rows[row].find_all("td")

    var_names.append(var_name.text)
    var_descrs.append(var_descr.text)
    file_names.append(file_name.text)
    file_descrs.append(file_descr.text)
    start_years.append(start_year.text)
    end_years.append(end_year.text)
    components.append(component.text)
    constraints.append(constraint.text)


df = pd.DataFrame(list(zip(var_names, var_descrs, file_names, file_descrs, start_years, end_years, components, constraints)), columns = ["var_name", "var_descr", "file_name", "file_descr", "start_year", "end_year", "component", "constraint"])

df.head(2)

In [None]:
def var_descr_detector(var_name, vars_df):
    descr = vars_df[vars_df["vAr_nAmE"] == var_name]["var_descr"].values[0]
    return descr

def n_rows(df, n_columns):
    columns = list(df.columns)

    if len(columns) % n_columns == 0:
        axes_rows = len(columns) // n_columns
    else:
        axes_rows = (len(columns) // n_columns) + 1

    return axes_rows

def multi_axes_plotter(df, n_columns, kind, figsize, var_names = None):
    n_rows_ = n_rows(df, n_columns)

    fig, axes = plt.subplots(n_rows_, n_columns, figsize = figsize)
    count = 0

    for row in range(axes.shape[0]):
        for column in range(axes.shape[1]):
            if kind == "strip":
                sns.stripplot(y = df.iloc[:, count], ax = axes[row][column])
            elif kind == "dist":
                sns.distplot(df.iloc[:, count], ax = axes[row][column])
            elif kind == "box":
                sns.boxplot(df.iloc[:, count], ax = axes[row][column])
            else:
                sns.histplot(df.iloc[:, count], ax = axes[row][column], bins = 30)

            try:
                axes[row][column].set(xlabel = var_descr_detector(df.iloc[:, count].name, var_names))
            except:
                pass

            if (count + 1) < df.shape[1]:
                count += 1
            else:
                break

    return fig

In [None]:
##### Callback to pass in to the compiler
class my_callback(Callback):
    def on_epoch_end(self, epoch, logs = None):
        if (epoch + 1) % 100 == 0 and epoch > 0:
            print(f"Epoch number {epoch + 1} done")

########################### DATA MANIPULATION ###########################
##### Total time points
def data(N):
    t= np.arange(0,N)
    x= (2*np.sin(0.02*t)*np.sin(0.003*t))+0.5*np.random.normal(size=N)

    return t, x

##### Batches
def batch_calculator(t, x, batch_size):
    t_batch = t[:batch_size]
    x_batch = x[:batch_size]

    return t_batch, x_batch

# Train_test splitter
def train_test(t_batch, x_batch, split):
    split_ = round(split * len(x_batch))

    t_train, t_test = t_batch[:split_], t_batch[split_:]
    x_train, x_test = x_batch[:split_], x_batch[split_:]

    return t_train, t_test, x_train, x_test

########################### MODELING ###########################
##### Neural network
def model_creator():
    model = Sequential([
        layers.LSTM(units = 128, input_shape = (1, 4), activation = "relu"),
        layers.Dense(32, activation = "relu"),
        layers.Dense(1)
    ])

    model.compile(loss = "mean_squared_error", optimizer = RMSprop(lr = .001), metrics = ["mse"])

    return model

##### Looper
def model_trainer(model, train, test):
    # Data preprocessing
    train = np.append(train,np.repeat(train[-1,], step))
    test = np.append(test,np.repeat(test[-1,], step))

    X_train, y_train = convert_to_matrix(train, step)
    X_test, y_test = convert_to_matrix(test, step)

    # Reshaping to fit into the model
    X_train, X_test = X_train.reshape(len(X_train), 1, 4), X_test.reshape(len(X_test), 1, 4)

    # Model training
    model_history = model.fit(X_train, y_train, epochs = 100, batch_size = 16, callbacks = [my_callback()], verbose = 0)

    # Predictions
    train_prediction = model.predict(X_train)
    test_prediction = model.predict(X_test)

    return model_history.history, train_prediction, test_prediction

########################### DATA VISUALIZATION ###########################
# To plot everything together
def plotter(model_history, t_train, t_test, x_train, x_test, train_prediction, test_prediction):
    fig = plt.figure(figsize = (14, 8), constrained_layout=True)
    gs = fig.add_gridspec(2, 2)

    # Actual data
    ax1 = fig.add_subplot(gs[0, :-1])
    ax1.set_title('Actual data')
    ax1.plot(t_train, x_train, c = 'blue')
    ax1.plot(t_test, x_test, c = 'orange', alpha = 0.7)
    ax1.legend(['Train','Test'])
    ax1.axvline(t_train[-1], c="r")
    ax1.grid(True)

    # Predictions
    ax2 = fig.add_subplot(gs[0, -1])
    ax2.set_title('Predictions')
    ax2.plot(t_train, train_prediction, c = 'blue')
    ax2.plot(t_test, test_prediction, c = 'orange', alpha = 0.7)
    ax2.legend(['Train','Test'])
    ax2.axvline(t_train[-1], c="r")
    ax2.grid(True)

    # Loss function
    ax3 = fig.add_subplot(gs[1, :])
    ax3.set_title('RMSE loss over epochs')
    ax3.plot(np.sqrt(model_history['loss']),c='k',lw=2)
    ax3.grid(True)
    ax3.set_xlabel("Epochs",fontsize=14)
    ax3.set_ylabel("Root-mean-squared error",fontsize=14)

    return fig