In [2]:
import json
import pickle

import pandas as pd
import plotly.express as px
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments

from util import compute_metrics, foresee

data = json.load(open('../models/auto_model/days_name_20/checkpoints/checkpoint-540/trainer_state.json'))

In [14]:
mse = []
mae = []
rmse = []
r2 = []
for i in range(len(data['log_history'])):
    mse.append(data['log_history'][i]['eval_mse'])  # scale this
    mae.append(data['log_history'][i]['eval_mae'])  # scale this
    rmse.append(data['log_history'][i]['eval_rmse'])  # scale this
    r2.append(data['log_history'][i]['eval_r2'])

In [15]:
data['log_history']

[{'epoch': 1.0,
  'eval_loss': 0.4376451075077057,
  'eval_mae': 0.4481404423713684,
  'eval_mse': 0.4376451075077057,
  'eval_r2': 0.49659382469155045,
  'eval_rmse': 0.6615474820137024,
  'eval_runtime': 27.8928,
  'eval_samples_per_second': 15.344,
  'eval_steps_per_second': 0.789,
  'step': 27},
 {'epoch': 2.0,
  'eval_loss': 0.41371819376945496,
  'eval_mae': 0.4387667775154114,
  'eval_mse': 0.41371819376945496,
  'eval_r2': 0.5241160503153055,
  'eval_rmse': 0.6432092785835266,
  'eval_runtime': 27.0564,
  'eval_samples_per_second': 15.819,
  'eval_steps_per_second': 0.813,
  'step': 54},
 {'epoch': 3.0,
  'eval_loss': 0.4001479148864746,
  'eval_mae': 0.4424608051776886,
  'eval_mse': 0.4001479148864746,
  'eval_r2': 0.5397253847925412,
  'eval_rmse': 0.6325724720954895,
  'eval_runtime': 27.7111,
  'eval_samples_per_second': 15.445,
  'eval_steps_per_second': 0.794,
  'step': 81},
 {'epoch': 4.0,
  'eval_loss': 0.4052346348762512,
  'eval_mae': 0.4596848487854004,
  'eval_mse'

In [17]:
dataframe = pd.DataFrame(mse, columns=['MSE'])
dataframe['MAE'] = mae
dataframe['RMSE'] = rmse
dataframe['R2'] = r2

In [18]:
dataframe

Unnamed: 0,MSE,MAE,RMSE,R2
0,0.437645,0.44814,0.661547,0.496594
1,0.413718,0.438767,0.643209,0.524116
2,0.400148,0.442461,0.632572,0.539725
3,0.405235,0.459685,0.63658,0.533874
4,0.396339,0.438548,0.629554,0.544107
5,0.434782,0.453532,0.65938,0.499887
6,0.400408,0.43728,0.632778,0.539427
7,0.485202,0.48181,0.696564,0.441891
8,0.412565,0.437512,0.642312,0.525443
9,0.475928,0.480684,0.689875,0.452558


In [19]:
px.line(dataframe)

In [4]:
PATH_TO_MODEL = '../models/auto_model/days_name_20'

model = AutoModelForSequenceClassification.from_pretrained(PATH_TO_MODEL)
tokenizer = AutoTokenizer.from_pretrained('dkleczek/bert-base-polish-uncased-v1')
scaler = pickle.load(open(PATH_TO_MODEL + '/scaler.pkl', 'rb'))

training_args = TrainingArguments(
    output_dir=PATH_TO_MODEL,
    num_train_epochs=5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=20,
    weight_decay=0.01,
    learning_rate=2e-5,
    logging_dir='../logs',
    save_total_limit=10,
    load_best_model_at_end=True,
    metric_for_best_model='rmse',
    evaluation_strategy="epoch",
    save_strategy="epoch",
)

# Call the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
)

In [5]:
iphone_11_64 = foresee(trainer, scaler, tokenizer, 'iphone 11 64gb', (1, 90))
iphone_11_128 = foresee(trainer, scaler, tokenizer, 'iphone 11 128gb', (1, 90))
iphone_11_256 = foresee(trainer, scaler, tokenizer, 'iphone 11 256gb', (1, 90))

***** Running Prediction *****
  Num examples = 89
  Batch size = 20


***** Running Prediction *****
  Num examples = 89
  Batch size = 20


***** Running Prediction *****
  Num examples = 89
  Batch size = 20


In [6]:
prediction = pd.DataFrame(iphone_11_64, columns=['iphone_11_64'])
prediction['iphone_11_128'] = iphone_11_128
prediction['iphone_11_256'] = iphone_11_256

In [13]:
fig = px.scatter(prediction, trendline='lowess',
                 title='Changes in price in relation to days passed since 01.01.2021').update_layout(
    xaxis_title='Days passed', yaxis_title='Price')
fig.add_vline(55)