In [None]:
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, root_mean_squared_error
from csv import writer
def evaluate_linear(x_test, y_test, model):
    y_pred = model.predict(x_test)

    avg_abs_per_error = mean_absolute_percentage_error(y_test, y_pred)
    avg_abs_error = mean_absolute_error(y_test, y_pred)
    avg_mean_sq_error = root_mean_squared_error(y_test, y_pred)

    print(f"Mean absolute percentage error: {avg_abs_per_error}")
    print(f"Mean absolute error: {avg_abs_error}")
    print(f"Mean root mean squared error: {avg_mean_sq_error}")

    return np.array([avg_abs_per_error, avg_abs_error, avg_mean_sq_error])

path = str(Path.cwd())
files = ["AUFNIRA_z3.637557.smt2", "artificl.tar", "Johnny_1280x720_60_short.y4m", "ambivert.wav.tar", "smallbank-1", "jpeg-large", "corona", "beethoven.wav", "ambivert"]
for file in files:

    data = pd.read_csv(path + "/data sets/"+ file +".csv")
    print(f"Training for {file}")
    data_array = data.to_numpy()

    x = data_array[:,:-1]
    y = data_array[:,-1:]

    features =  x.shape[1]
    hidden = int(features * 7.5)
    batch = int(x.shape[0] / 50)
    rng = np.random.RandomState(seed=111111)
    repetitions = 1
    metrics = np.zeros((repetitions,3))
    for i in range(repetitions):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
        model = LinearRegression().fit(x_train, y_train)
        print("Results on test set:")
        metrics[i] = evaluate_linear(x_test, y_test, model)
    with open(path + "/data analysis/" + file +" linear test_set analysis.csv", 'w', newline='') as file_object:
        writer_object = writer(file_object)
        for j  in range(metrics.shape[0]):
            writer_object.writerow(metrics[j])

Training for AUFNIRA_z3.637557.smt2
Results on test set:
Mean absolute percentage error: 7.212125467824746
Mean absolute error: 87.70598427492789
Mean root mean squared error: 99.6500852127757
Results on test set:
Mean absolute percentage error: 7.1712248290242036
Mean absolute error: 88.98507107534036
Mean root mean squared error: 100.31255997543532
Results on test set:
Mean absolute percentage error: 7.26240871353975
Mean absolute error: 91.23552534790922
Mean root mean squared error: 103.79579412876299
Training for artificl.tar
Results on test set:
Mean absolute percentage error: 0.2771888404422336
Mean absolute error: 0.006210428810119629
Mean root mean squared error: 0.007362177654639514
Results on test set:
Mean absolute percentage error: 0.28370430416531034
Mean absolute error: 0.006387519327799479
Mean root mean squared error: 0.007539402721822576
Results on test set:
Mean absolute percentage error: 0.29276163313123915
Mean absolute error: 0.006351664733886718
Mean root mean sq