<a href="https://colab.research.google.com/github/PreyPython123/Master-V24-Semiveiledet-Regresjon/blob/Collagen-Pradeep/Bioco_Collagen_Semiveiledet_Regresjonsmetoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Lasting av nødvendige bibliotek og pakker

In [1]:
!pip install LAMDA-SSL
!pip install optuna

Collecting LAMDA-SSL
  Downloading LAMDA_SSL-1.0.2-py3-none-any.whl (240 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.8/240.8 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting torch-geometric (from LAMDA-SSL)
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric, LAMDA-SSL
Successfully installed LAMDA-SSL-1.0.2 torch-geometric-2.5.0
Collecting optuna
  Downloading optuna-3.5.0-py3-none-any.whl (413 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.4/413.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  

Importering av nødvendige bibliotek og pakker

In [2]:
import pandas as pd
import numpy as np
import copy

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from LAMDA_SSL.Algorithm.Regression.CoReg import CoReg

import optuna
from optuna.visualization import plot_optimization_history

Evalueringsmetrikker

In [3]:
from sklearn.metrics import (mean_absolute_error,
                             mean_squared_error,
                             mean_absolute_percentage_error,
                             r2_score)

# Lager en funksjon for RMSE
def rmse(y_faktisk, y_predikert):
  return np.sqrt(mean_squared_error(y_faktisk, y_predikert))

Importering av relevant data

In [4]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
# Velger første kolonne med dato og tid som index
collagen_data = pd.read_csv('/content/drive/MyDrive/MasterV24/Bioco_data/collagen_data.csv',
                            header=0,
                            sep=',',
                            index_col=0)

# Importerer rådata
rå_data = pd.read_csv('/content/drive/MyDrive/MasterV24/BiocoData.csv',
                      header=0,
                      sep=';',
                      index_col=0)

# Formatterer index til riktig format og datatype
collagen_data.index = pd.to_datetime(collagen_data.index,
                                     format='%Y-%m-%d %H:%M:%S')

rå_data.index = pd.to_datetime(rå_data.index,
                               format='%d-%m-%Y %H:%M:%S.%f')

Tilfeldighetsfrø

In [6]:
#Tilfeldighetsfrø
random_seed = 123

# Oppdeling av datasett

**Alternativ 1: Fordelt på enzymtyper**

In [7]:
  def trening_testsett_oppdeling_enzym(original_df,
                                      filtert_df,
                                      test_andel=0.2,
                                      random_seed=123):




    # Responsvariabel
    responsvariabel = filtert_df.columns[-1]

    # Deler opp markert og umarkert del av datasettet
    data_markert = filtert_df.dropna(subset=[responsvariabel])
    data_umarkert = filtert_df[filtert_df[responsvariabel].isna()]

    # Enzymtyper
    enzym_data = original_df.merge(data_markert,
                                  left_index=True,
                                  right_index=True,
                                  how='inner',
                                  suffixes=('','_drop'))['EnzymeCode']

    # Splitter trening og testdata etter enzymtype
    trening_markert, test_markert, _, _ = train_test_split(data_markert,
                                                          enzym_data,
                                                          test_size=test_andel,
                                                          stratify=enzym_data,
                                                          random_state=random_seed)

    # Definerer trening og testsett for data

    felles_indeks = filtert_df.index.intersection(test_markert.index)
    treningsdata = filtert_df.drop(felles_indeks)
    trening_umarkert = treningsdata[treningsdata[responsvariabel].isna()]

    return trening_markert, test_markert, trening_umarkert, treningsdata

**Alternativ 2: Fordelt på dag og kontinuitet**

In [8]:
def trening_testsett_oppdeling_dag(original_df,
                                   filtert_df,
                                   test_andel=0.2):

    # Responsvariabel
    responsvariabel = filtert_df.columns[-1]

    # Lager nytt trening og testsett
    data_markert = filtert_df.dropna(subset=[responsvariabel])

    trening_markert = copy.deepcopy(data_markert)
    test_markert = pd.DataFrame(columns=list(data_markert.columns))

    # Råmateriale type
    råmateriale_data = original_df.merge(data_markert,
                                         left_index=True,
                                         right_index=True,
                                         how='inner',
                                         suffixes=('','_drop'))['RawMaterialMix']
    data_markert['RawMaterialMix'] = råmateriale_data


    # Går gjennom hver dag, hver uke og samler første andel av rader til testsett
    for uke in data_markert.index.isocalendar().week.unique():
        for dag in data_markert.index.isocalendar().day.unique():
            data = data_markert[(data_markert.index.isocalendar().week == uke) &
                      (data_markert.index.isocalendar().day == dag)]
            test_rader = round((data.shape[0] * test_andel))
            test_data_dag = data.head(test_rader)

            # Sjekker første instans av "Turkey" eller "Chicken" i foreløpig testsett
            krav = (test_data_dag["RawMaterialMix"] == "Turkey") | (test_data_dag["RawMaterialMix"] == "Chicken")

            # Hvis testsettet inneholder "Turkey" eller "Chicken", legges det
            # til en rad, da første rad med gjelende utelukkes
            if krav.any():
              test_rader += 1

              # Finner mengde testsett uten "Turkey" eller "Chicken"
              while krav.any():
                test_data_dag = data.loc[krav.idxmax():].head(test_rader).copy()
                test_data_dag = test_data_dag.drop(index = krav.idxmax())

                # Finner nest beste mengde testsett uten "Turkey" eller "Chicken"
                if (test_data_dag.shape[0] < test_rader) & (test_data_dag.index[-1] == data.index[-1]):
                  test_rader -= 1
                  test_data_dag = data.head(test_rader).copy()

                krav = (test_data_dag["RawMaterialMix"] == "Turkey") | (test_data_dag["RawMaterialMix"] == "Chicken")

            # Dropper "RawMaterialMix" som kolonne
            test_data_dag = test_data_dag.drop(columns = ['RawMaterialMix'])

            # Legger til i fullstendig testsett og fjerner fra treningssett
            test_markert = pd.concat([test_markert, test_data_dag])
            trening_markert = trening_markert.drop(test_data_dag.index.copy())

    felles_indeks = filtert_df.index.intersection(test_markert.index)
    treningsdata = filtert_df.drop(felles_indeks.copy())
    trening_umarkert = treningsdata[treningsdata[responsvariabel].isna()]

    return trening_markert, test_markert, trening_umarkert, treningsdata

# NIR målinger inkludert

## Alternativ 1: Fordelt på enzymtyper

### Alternativ 1: Et testsett

In [None]:
# Velger testandel
test_andel = 0.2

# Deler datasettet i trening og testdel, samt markert og umarkert del
collagen_trening_markert, collagen_test, collagen_trening_umarkert, collagen_trening = trening_testsett_oppdeling_enzym(rå_data,
                                                                                                                        collagen_data,
                                                                                                                        test_andel)
# Deler settene i forklarings- og responsvariabler
X_trening = collagen_trening.iloc[:, :-1]
X_trening_markert = collagen_trening_markert.iloc[:, :-1]
X_trening_umarkert = collagen_trening_umarkert.iloc[:, :-1]
X_test = collagen_test.iloc[:, :-1]

y_trening_markert = collagen_trening_markert.iloc[:, -1]
y_test = collagen_test.iloc[:, -1]

# Skalerer treningsdata
skalerer = StandardScaler()
skalerer.fit(X_trening)
X_trening_markert_transformert = skalerer.transform(X_trening_markert)
X_trening_umarkert_transformert = skalerer.transform(X_trening_umarkert)
X_test_transformert = skalerer.transform(X_test)

**CoReg**

Hyperparamter optimalisering med Optuna

In [None]:
def objective(trial):
  parametere = {
      'k1': trial.suggest_int('k1', 1, 10),
      'k2': trial.suggest_int('k2', 1, 10),
      'p1': trial.suggest_int('p1', 1, 10),
      'p2': trial.suggest_int('p2', 1, 10)
  }

  coreg = CoReg()

  coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
  y_test_prediksjon = coreg.predict(X_test_transformert)
  rmse_test = rmse(y_test, y_test_prediksjon)
  return rmse_test

if __name__ == "__main__":
  study = optuna.create_study(direction='minimize')
  study.optimize(objective, n_trials=10)

plot_optimization_history(study)

[I 2024-02-14 20:42:41,545] A new study created in memory with name: no-name-a4f82ac0-e634-42b0-800e-cd81d0932bfc
[I 2024-02-14 20:44:24,194] Trial 0 finished with value: 4.038427262301392 and parameters: {'k1': 10, 'k2': 7, 'p1': 10, 'p2': 10}. Best is trial 0 with value: 4.038427262301392.
[I 2024-02-14 20:46:05,552] Trial 1 finished with value: 3.7090680253126718 and parameters: {'k1': 7, 'k2': 8, 'p1': 3, 'p2': 8}. Best is trial 1 with value: 3.7090680253126718.
[I 2024-02-14 20:47:44,977] Trial 2 finished with value: 3.53469274282913 and parameters: {'k1': 7, 'k2': 9, 'p1': 10, 'p2': 1}. Best is trial 2 with value: 3.53469274282913.
[I 2024-02-14 20:49:20,477] Trial 3 finished with value: 3.8592655773132374 and parameters: {'k1': 4, 'k2': 1, 'p1': 9, 'p2': 10}. Best is trial 2 with value: 3.53469274282913.
[I 2024-02-14 20:50:55,098] Trial 4 finished with value: 3.7445986929648925 and parameters: {'k1': 6, 'k2': 1, 'p1': 8, 'p2': 5}. Best is trial 2 with value: 3.53469274282913.
[

Evauleringer av beste modell

In [None]:
def detailed_objective(trial):
  parametere = {
      'k1': trial.suggest_int('k1', 1, 10),
      'k2': trial.suggest_int('k2', 1, 10),
      'p1': trial.suggest_int('p1', 1, 10),
      'p2': trial.suggest_int('p2', 1, 10)
  }

  coreg = CoReg()

  coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
  y_test_prediksjon = coreg.predict(X_test_transformert)
  y_trening_prediksjon = coreg.predict(X_trening_markert_transformert)

  rmse_test = rmse(y_test, y_test_prediksjon)
  r2_test = r2_score(y_test, y_test_prediksjon)
  mae_test = mean_absolute_error(y_test, y_test_prediksjon)
  mape_test = mean_absolute_percentage_error(y_test, y_test_prediksjon)


  rmse_trening = rmse(y_trening_markert, y_trening_prediksjon)
  r2_trening = r2_score(y_trening_markert, y_trening_prediksjon)
  mae_trening = mean_absolute_error(y_trening_markert, y_trening_prediksjon)
  mape_trening = mean_absolute_percentage_error(y_trening_markert, y_trening_prediksjon)

  return rmse_test, r2_test, mae_test, mape_test, rmse_trening, r2_trening, mae_trening, mape_trening

rmse_test_resultat = detailed_objective(study.best_trial)[0]
r2_test_resultat = detailed_objective(study.best_trial)[1]
mae_test_resultat = detailed_objective(study.best_trial)[2]
mape_test_resultat = detailed_objective(study.best_trial)[3]

rmse_trening_resultat = detailed_objective(study.best_trial)[4]
r2_trening_resultat = detailed_objective(study.best_trial)[5]
mae_trening_resultat = detailed_objective(study.best_trial)[6]
mape_trening_resultat = detailed_objective(study.best_trial)[7]

collagen_coreg_resultater_enzym = pd.DataFrame(columns = ["Test andel",
                                                          "RMSE test",
                                                          "R2 test",
                                                          "MAE test",
                                                          "MAPE test",
                                                          "RMSE trening",
                                                          "R2 trening",
                                                          "MAE trening",
                                                          "MAPE trening",
                                                          "Beste parametere"])

beste_parametere = study.best_params

resultater = [test_andel,
              rmse_test_resultat,
              r2_test_resultat,
              mae_test_resultat,
              mape_test_resultat,
              rmse_trening_resultat,
              r2_trening_resultat,
              mae_trening_resultat,
              mape_trening_resultat,
              beste_parametere]

collagen_coreg_resultater_enzym.loc[len(collagen_coreg_resultater_enzym)] = resultater
print(collagen_coreg_resultater_enzym)

KeyboardInterrupt: 

### Alternativ 1: Flere testandeler

In [None]:
collagen_coreg_resultater_dag = pd.DataFrame(columns = ["Test andel",
                                                        "RMSE test",
                                                        "R2 test",
                                                        "MAE test",
                                                        "MAPE test",
                                                        "RMSE trening",
                                                        "R2 trening",
                                                        "MAE trening",
                                                        "MAPE trening",
                                                        "Beste parametere"])

test_andeler = [0.1, 0.2, 0.3, 0.4, 0.5]

for test_andel in test_andeler:
  collagen_trening_markert, collagen_test, collagen_trening_umarkert, collagen_trening = trening_testsett_oppdeling_enzym(rå_data,
                                                                                                                          collagen_data,
                                                                                                                          test_andel)

  X_trening = collagen_trening.iloc[:, :-1]
  X_trening_markert = collagen_trening_markert.iloc[:, :-1]
  X_trening_umarkert = collagen_trening_umarkert.iloc[:, :-1]
  X_test = collagen_test.iloc[:, :-1]

  y_trening_markert = collagen_trening_markert.iloc[:, -1]
  y_test = collagen_test.iloc[:, -1]

  skalerer = StandardScaler()
  skalerer.fit(X_trening)
  X_trening_markert_transformert = skalerer.transform(X_trening_markert)
  X_trening_umarkert_transformert = skalerer.transform(X_trening_umarkert)
  X_test_transformert = skalerer.transform(X_test)

  def objective(trial):
    parametere = {
        'k1': trial.suggest_int('k1', 1, 10),
        'k2': trial.suggest_int('k2', 1, 10),
        'p1': trial.suggest_int('p1', 1, 10),
        'p2': trial.suggest_int('p2', 1, 10)
    }

    coreg = CoReg()

    coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
    y_test_prediksjon = coreg.predict(X_test_transformert)
    rmse_test = rmse(y_test, y_test_prediksjon)
    return rmse_test

  if __name__ == "__main__":
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=10)


  def detailed_objective(trial):
    parametere = {
        'k1': trial.suggest_int('k1', 1, 10),
        'k2': trial.suggest_int('k2', 1, 10),
        'p1': trial.suggest_int('p1', 1, 10),
        'p2': trial.suggest_int('p2', 1, 10)
    }

    coreg = CoReg()

    coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
    y_test_prediksjon = coreg.predict(X_test_transformert)
    y_trening_prediksjon = coreg.predict(X_trening_markert_transformert)

    rmse_test = rmse(y_test, y_test_prediksjon)
    r2_test = r2_score(y_test, y_test_prediksjon)
    mae_test = mean_absolute_error(y_test, y_test_prediksjon)
    mape_test = mean_absolute_percentage_error(y_test, y_test_prediksjon)


    rmse_trening = rmse(y_trening_markert, y_trening_prediksjon)
    r2_trening = r2_score(y_trening_markert, y_trening_prediksjon)
    mae_trening = mean_absolute_error(y_trening_markert, y_trening_prediksjon)
    mape_trening = mean_absolute_percentage_error(y_trening_markert, y_trening_prediksjon)

    return rmse_test, r2_test, mae_test, mape_test, rmse_trening, r2_trening, mae_trening, mape_trening

  rmse_test_resultat = detailed_objective(study.best_trial)[0]
  r2_test_resultat = detailed_objective(study.best_trial)[1]
  mae_test_resultat = detailed_objective(study.best_trial)[2]
  mape_test_resultat = detailed_objective(study.best_trial)[3]

  rmse_trening_resultat = detailed_objective(study.best_trial)[4]
  r2_trening_resultat = detailed_objective(study.best_trial)[5]
  mae_trening_resultat = detailed_objective(study.best_trial)[6]
  mape_trening_resultat = detailed_objective(study.best_trial)[7]

  beste_parametere = study.best_params

  resultater = [test_andel,
                rmse_test_resultat,
                r2_test_resultat,
                mae_test_resultat,
                mape_test_resultat,
                rmse_trening_resultat,
                r2_trening_resultat,
                mae_trening_resultat,
                mape_trening_resultat,
                beste_parametere]

  collagen_coreg_resultater_enzym.loc[len(collagen_coreg_resultater_enzym)] = resultater

[I 2024-02-15 20:44:53,442] A new study created in memory with name: no-name-1497e4f9-fc24-4094-a8b5-2f1d009feec6
[I 2024-02-15 20:46:20,950] Trial 0 finished with value: 3.410728077431902 and parameters: {'k1': 10, 'k2': 6, 'p1': 3, 'p2': 4}. Best is trial 0 with value: 3.410728077431902.
[I 2024-02-15 20:47:44,165] Trial 1 finished with value: 3.406415484138876 and parameters: {'k1': 5, 'k2': 6, 'p1': 4, 'p2': 7}. Best is trial 1 with value: 3.406415484138876.
[I 2024-02-15 20:49:07,131] Trial 2 finished with value: 3.6327959055266694 and parameters: {'k1': 4, 'k2': 3, 'p1': 1, 'p2': 9}. Best is trial 1 with value: 3.406415484138876.
[I 2024-02-15 20:50:31,564] Trial 3 finished with value: 3.5885077886775374 and parameters: {'k1': 2, 'k2': 5, 'p1': 2, 'p2': 4}. Best is trial 1 with value: 3.406415484138876.
[I 2024-02-15 20:51:53,801] Trial 4 finished with value: 3.7192863856940046 and parameters: {'k1': 1, 'k2': 10, 'p1': 7, 'p2': 5}. Best is trial 1 with value: 3.406415484138876.
[

KeyboardInterrupt: 

In [None]:
collagen_coreg_resultater_enzym

Unnamed: 0,Test andel,RMSE test,R2 test,RMSE trening,R2 trening,Beste parametere
0,0.1,3.785645,0.5453,1.10664,0.943903,"{'k1': 2, 'k2': 10, 'p1': 2, 'p2': 2}"
1,0.2,3.782097,0.555504,1.214655,0.948109,"{'k1': 4, 'k2': 7, 'p1': 7, 'p2': 3}"
2,0.3,3.97184,0.594327,1.139087,0.949522,"{'k1': 2, 'k2': 5, 'p1': 3, 'p2': 2}"
3,0.4,3.923748,0.6027,1.219026,0.945218,"{'k1': 7, 'k2': 10, 'p1': 4, 'p2': 1}"
4,0.5,3.881073,0.6177,1.083216,0.94928,"{'k1': 9, 'k2': 10, 'p1': 9, 'p2': 10}"


## Alternativ 2: Fordelt på dag og kontinuitet

### Alternativ 2: Et testsett

In [None]:
# Valg av testandel
test_andel = 0.2

# Oppdeling av datasett til trenin og testsett, samt markert og umarkert del
collagen_trening_markert, collagen_test, collagen_trening_umarkert, collagen_trening = trening_testsett_oppdeling_dag(rå_data,
                                                                                                                      collagen_data,
                                                                                                                      test_andel)
# Forklarings- og responsvariabler
X_trening = collagen_trening.iloc[:, :-1]
X_trening_markert = collagen_trening_markert.iloc[:, :-1]
X_trening_umarkert = collagen_trening_umarkert.iloc[:, :-1]
X_test = collagen_test.iloc[:, :-1]

y_trening_markert = collagen_trening_markert.iloc[:, -1]
y_test = collagen_test.iloc[:, -1]

# Skalering
skalerer = StandardScaler()
skalerer.fit(X_trening)
X_trening_markert_transformert = skalerer.transform(X_trening_markert)
X_trening_umarkert_transformert = skalerer.transform(X_trening_umarkert)
X_test_transformert = skalerer.transform(X_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_markert['RawMaterialMix'] = råmateriale_data


**CoReg**

Hyperparamter optimalisering med Optuna

In [None]:
def objective(trial):
  parametere = {
      'k1': trial.suggest_int('k1', 1, 10),
      'k2': trial.suggest_int('k2', 1, 10),
      'p1': trial.suggest_int('p1', 1, 10),
      'p2': trial.suggest_int('p2', 1, 10)
  }

  coreg = CoReg()

  coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
  y_test_prediksjon = coreg.predict(X_test_transformert)
  rmse_test = rmse(y_test, y_test_prediksjon)
  return rmse_test

if __name__ == "__main__":
  study = optuna.create_study(direction='minimize')
  study.optimize(objective, n_trials=10)

plot_optimization_history(study)

[I 2024-02-15 14:28:53,433] A new study created in memory with name: no-name-4a0c3411-e2ea-4af7-95d2-e672050f4c97
[I 2024-02-15 14:30:28,930] Trial 0 finished with value: 3.946188071342318 and parameters: {'k1': 9, 'k2': 3, 'p1': 9, 'p2': 10}. Best is trial 0 with value: 3.946188071342318.
[I 2024-02-15 14:32:09,881] Trial 1 finished with value: 3.8542007899294615 and parameters: {'k1': 4, 'k2': 1, 'p1': 1, 'p2': 1}. Best is trial 1 with value: 3.8542007899294615.
[I 2024-02-15 14:33:46,219] Trial 2 finished with value: 3.7641835996585167 and parameters: {'k1': 8, 'k2': 3, 'p1': 3, 'p2': 10}. Best is trial 2 with value: 3.7641835996585167.
[I 2024-02-15 14:35:20,185] Trial 3 finished with value: 3.670252371335196 and parameters: {'k1': 9, 'k2': 8, 'p1': 6, 'p2': 2}. Best is trial 3 with value: 3.670252371335196.
[I 2024-02-15 14:36:56,081] Trial 4 finished with value: 3.6237016038207743 and parameters: {'k1': 6, 'k2': 2, 'p1': 2, 'p2': 9}. Best is trial 4 with value: 3.6237016038207743

Evauleringer av beste modell

In [None]:
def detailed_objective(trial):
  parametere = {
      'k1': trial.suggest_int('k1', 1, 10),
      'k2': trial.suggest_int('k2', 1, 10),
      'p1': trial.suggest_int('p1', 1, 10),
      'p2': trial.suggest_int('p2', 1, 10)
  }

  coreg = CoReg()

  coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
  y_test_prediksjon = coreg.predict(X_test_transformert)
  y_trening_prediksjon = coreg.predict(X_trening_markert_transformert)

  rmse_test = rmse(y_test, y_test_prediksjon)
  r2_test = r2_score(y_test, y_test_prediksjon)
  mae_test = mean_absolute_error(y_test, y_test_prediksjon)
  mape_test = mean_absolute_percentage_error(y_test, y_test_prediksjon)


  rmse_trening = rmse(y_trening_markert, y_trening_prediksjon)
  r2_trening = r2_score(y_trening_markert, y_trening_prediksjon)
  mae_trening = mean_absolute_error(y_trening_markert, y_trening_prediksjon)
  mape_trening = mean_absolute_percentage_error(y_trening_markert, y_trening_prediksjon)

  return rmse_test, r2_test, mae_test, mape_test, rmse_trening, r2_trening, mae_trening, mape_trening

rmse_test_resultat = detailed_objective(study.best_trial)[0]
r2_test_resultat = detailed_objective(study.best_trial)[1]
mae_test_resultat = detailed_objective(study.best_trial)[2]
mape_test_resultat = detailed_objective(study.best_trial)[3]

rmse_trening_resultat = detailed_objective(study.best_trial)[4]
r2_trening_resultat = detailed_objective(study.best_trial)[5]
mae_trening_resultat = detailed_objective(study.best_trial)[6]
mape_trening_resultat = detailed_objective(study.best_trial)[7]

collagen_coreg_resultater_enzym = pd.DataFrame(columns = ["Test andel",
                                                          "RMSE test",
                                                          "R2 test",
                                                          "MAE test",
                                                          "MAPE test",
                                                          "RMSE trening",
                                                          "R2 trening",
                                                          "MAE trening",
                                                          "MAPE trening",
                                                          "Beste parametere"])

beste_parametere = study.best_params

resultater = [test_andel,
              rmse_test_resultat,
              r2_test_resultat,
              mae_test_resultat,
              mape_test_resultat,
              rmse_trening_resultat,
              r2_trening_resultat,
              mae_trening_resultat,
              mape_trening_resultat,
              beste_parametere]

collagen_coreg_resultater_enzym.loc[len(collagen_coreg_resultater_enzym)] = resultater
print(collagen_coreg_resultater_enzym)

   Test andel  RMSE test   R2 test  MAE test  MAPE test  RMSE trening  \
0         0.2   3.654918  0.453441  3.058378   0.146384      1.390003   

   R2 trening  MAE trening  MAPE trening                      Beste parametere  
0    0.925645     0.997047      0.044034  {'k1': 8, 'k2': 2, 'p1': 3, 'p2': 7}  


### Alternativ 2: Flere testandeler

In [10]:
collagen_coreg_resultater_dag = pd.DataFrame(columns = ["Test andel",
                                                        "RMSE test",
                                                        "R2 test",
                                                        "MAE test",
                                                        "MAPE test",
                                                        "RMSE trening",
                                                        "R2 trening",
                                                        "MAE trening",
                                                        "MAPE trening",
                                                        "Beste parametere"])

test_andeler = [0.1, 0.2, 0.3, 0.4, 0.5]

for test_andel in test_andeler:
  collagen_trening_markert, collagen_test, collagen_trening_umarkert, collagen_trening = trening_testsett_oppdeling_dag(rå_data,
                                                                                                                      collagen_data,
                                                                                                                      test_andel)


  X_trening = collagen_trening.iloc[:, :-1]
  X_trening_markert = collagen_trening_markert.iloc[:, :-1]
  X_trening_umarkert = collagen_trening_umarkert.iloc[:, :-1]
  X_test = collagen_test.iloc[:, :-1]

  y_trening_markert = collagen_trening_markert.iloc[:, -1]
  y_test = collagen_test.iloc[:, -1]

  skalerer = StandardScaler()
  skalerer.fit(X_trening)
  X_trening_markert_transformert = skalerer.transform(X_trening_markert)
  X_trening_umarkert_transformert = skalerer.transform(X_trening_umarkert)
  X_test_transformert = skalerer.transform(X_test)

  def objective(trial):
    parametere = {
        'k1': trial.suggest_int('k1', 1, 10),
        'k2': trial.suggest_int('k2', 1, 10),
        'p1': trial.suggest_int('p1', 1, 10),
        'p2': trial.suggest_int('p2', 1, 10)
    }

    coreg = CoReg()

    coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
    y_test_prediksjon = coreg.predict(X_test_transformert)
    rmse_test = rmse(y_test, y_test_prediksjon)
    return rmse_test

  if __name__ == "__main__":
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=10)


  def detailed_objective(trial):
    parametere = {
        'k1': trial.suggest_int('k1', 1, 10),
        'k2': trial.suggest_int('k2', 1, 10),
        'p1': trial.suggest_int('p1', 1, 10),
        'p2': trial.suggest_int('p2', 1, 10)
    }

    coreg = CoReg()

    coreg.fit(X_trening_markert_transformert, y_trening_markert.values, X_trening_umarkert_transformert)
    y_test_prediksjon = coreg.predict(X_test_transformert)
    y_trening_prediksjon = coreg.predict(X_trening_markert_transformert)

    rmse_test = rmse(y_test, y_test_prediksjon)
    r2_test = r2_score(y_test, y_test_prediksjon)
    mae_test = mean_absolute_error(y_test, y_test_prediksjon)
    mape_test = mean_absolute_percentage_error(y_test, y_test_prediksjon)


    rmse_trening = rmse(y_trening_markert, y_trening_prediksjon)
    r2_trening = r2_score(y_trening_markert, y_trening_prediksjon)
    mae_trening = mean_absolute_error(y_trening_markert, y_trening_prediksjon)
    mape_trening = mean_absolute_percentage_error(y_trening_markert, y_trening_prediksjon)

    return rmse_test, r2_test, mae_test, mape_test, rmse_trening, r2_trening, mae_trening, mape_trening

  rmse_test_resultat = detailed_objective(study.best_trial)[0]
  r2_test_resultat = detailed_objective(study.best_trial)[1]
  mae_test_resultat = detailed_objective(study.best_trial)[2]
  mape_test_resultat = detailed_objective(study.best_trial)[3]

  rmse_trening_resultat = detailed_objective(study.best_trial)[4]
  r2_trening_resultat = detailed_objective(study.best_trial)[5]
  mae_trening_resultat = detailed_objective(study.best_trial)[6]
  mape_trening_resultat = detailed_objective(study.best_trial)[7]

  beste_parametere = study.best_params

  resultater = [test_andel,
                rmse_test_resultat,
                r2_test_resultat,
                mae_test_resultat,
                mape_test_resultat,
                rmse_trening_resultat,
                r2_trening_resultat,
                mae_trening_resultat,
                mape_trening_resultat,
                beste_parametere]

  collagen_coreg_resultater_dag.loc[len(collagen_coreg_resultater_dag)] = resultater

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_markert['RawMaterialMix'] = råmateriale_data
[I 2024-02-16 12:02:10,534] A new study created in memory with name: no-name-d74b9daf-55c3-44f0-b2a5-598063eac103
[I 2024-02-16 12:03:36,415] Trial 0 finished with value: 3.8877537424688855 and parameters: {'k1': 3, 'k2': 8, 'p1': 1, 'p2': 9}. Best is trial 0 with value: 3.8877537424688855.
[I 2024-02-16 12:05:00,324] Trial 1 finished with value: 4.3391025260368075 and parameters: {'k1': 8, 'k2': 10, 'p1': 2, 'p2': 8}. Best is trial 0 with value: 3.8877537424688855.
[I 2024-02-16 12:06:24,139] Trial 2 finished with value: 3.287827978347458 and parameters: {'k1': 7, 'k2': 9, 'p1': 9, 'p2': 3}. Best is trial 2 with value: 3.287827978347458.
[I 2024-02-16 12:07:55,110] Trial 3 fi

In [11]:
collagen_coreg_resultater_dag

Unnamed: 0,Test andel,RMSE test,R2 test,MAE test,MAPE test,RMSE trening,R2 trening,MAE trening,MAPE trening,Beste parametere
0,0.1,3.732778,0.105147,3.265388,0.162898,1.462619,0.927568,1.056689,0.046517,"{'k1': 7, 'k2': 9, 'p1': 9, 'p2': 3}"
1,0.2,3.516569,0.4178,3.157804,0.147428,1.35168,0.926205,1.045626,0.046103,"{'k1': 6, 'k2': 5, 'p1': 6, 'p2': 1}"
2,0.3,4.132328,0.045528,3.398061,0.168199,1.339703,0.93292,1.020832,0.044069,"{'k1': 9, 'k2': 6, 'p1': 6, 'p2': 1}"
3,0.4,3.49024,0.540573,2.805247,0.124713,1.446574,0.931633,1.177557,0.053503,"{'k1': 7, 'k2': 8, 'p1': 2, 'p2': 8}"
4,0.5,3.917921,0.47511,2.899328,0.148522,1.398529,0.908545,1.139016,0.045939,"{'k1': 3, 'k2': 9, 'p1': 5, 'p2': 10}"
