In [3]:
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, root_mean_squared_error
from csv import writer
def evaluate_linear(x_test, y_test, model):
    y_pred = model.predict(x_test)

    avg_abs_per_error = mean_absolute_percentage_error(y_test, y_pred)
    avg_abs_error = mean_absolute_error(y_test, y_pred)
    avg_mean_sq_error = root_mean_squared_error(y_test, y_pred)

    print(f"Mean absolute percentage error: {avg_abs_per_error}")
    print(f"Mean absolute error: {avg_abs_error}")
    print(f"Mean root mean squared error: {avg_mean_sq_error}")

    return np.array([avg_abs_per_error, avg_abs_error, avg_mean_sq_error])

path = str(Path.cwd())
files = ["AUFNIRA_z3.637557.smt2", "artificl.tar", "Johnny_1280x720_60_short.y4m", "ambivert.wav.tar", "smallbank-1", "jpeg-large", "corona", "beethoven.wav", "ambivert"]
for file in files:

    data = pd.read_csv(path + "/data sets/"+ file +".csv")
    print(f"Training for {file}")
    data_array = data.to_numpy()

    x = data_array[:,:-1]
    y = data_array[:,-1:]

    features =  x.shape[1]
    hidden = int(features * 7.5)
    batch = int(x.shape[0] / 50)
    rng = np.random.RandomState(seed=111111)
    repetitions = 1
    metrics = np.zeros((repetitions,3))
    for i in range(repetitions):
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
        model = LinearRegression().fit(x_train, y_train)
        print("Results on test set:")
        metrics[i] = evaluate_linear(x_test, y_test, model)
    with open(path + "/data analysis/" + file +" linear test_set analysis.csv", 'w', newline='') as file_object:
        writer_object = writer(file_object)
        for j  in range(metrics.shape[0]):
            writer_object.writerow(metrics[j])

Training for AUFNIRA_z3.637557.smt2
Results on test set:
Mean absolute percentage error: 8.084588767009006
Mean absolute error: 88.49760693995725
Mean root mean squared error: 99.86683399849277
Training for artificl.tar
Results on test set:
Mean absolute percentage error: 0.2967736138237847
Mean absolute error: 0.006454711914062499
Mean root mean squared error: 0.007732359536871906
Training for Johnny_1280x720_60_short.y4m
Results on test set:
Mean absolute percentage error: 0.8720714565097186
Mean absolute error: 2.4569789559410116
Mean root mean squared error: 3.2365008856079474
Training for ambivert.wav.tar
Results on test set:
Mean absolute percentage error: 1.7492795222477235
Mean absolute error: 13.965689655172413
Mean root mean squared error: 21.591494853298137
Training for smallbank-1
Results on test set:
Mean absolute percentage error: 0.05728584517213347
Mean absolute error: 1130.6165873913187
Mean root mean squared error: 1356.2391955525623
Training for jpeg-large
Results on