In [1]:
!pip install transformers==3.3.0

Successfully installed sacremoses-0.0.45 sentencepiece-0.1.96 tokenizers-0.8.1rc2 transformers-3.3.0


## Required Packages

In [2]:
import sys
import gc
import os
import json

import torch
import warnings
import transformers
import torch.nn as nn
import time
import warnings
import nltk, string

import re
import requests
from sklearn.preprocessing import MinMaxScaler

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from google.colab import drive
from transformers import BertTokenizer, BertModel
from sklearn import model_selection
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import mean_squared_error,mean_squared_log_error,r2_score


tqdm.pandas()
from sklearn.feature_extraction.text import TfidfVectorizer
nltk.download('punkt') # if necessary...
pd.set_option('display.max_colwidth', 255)
warnings.filterwarnings("ignore")
nltk.download('stopwords')
drive.mount('/content/drive')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
Mounted at /content/drive


In [3]:
def label_scaling(val):
    val = np.log(val + 1)
    scaler = MinMaxScaler()
    scaler.fit(val)
    val = scaler.transform(val)
    return scaler, val


def label_inverse_scaling(scaler, val):
    val = scaler.inverse_transform(val)
    val = np.exp(val) - 1
    return val


def seed_everything(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)

class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [4]:
path = '/content/drive/MyDrive/twitter-popularity-prediction/'
df = pd.read_csv(f"{path}data.csv")
#df = df[['user_name','description','text','follower_count','friends_count','num_of_likes','retweet_count']]
#df[df.isnull().any(axis=1)]
#df.head()
df.isna().sum()

user_id              0
user_name            0
location          3422
description       1600
follower_count       0
friends_count        0
verified             0
tweet_id             0
created_at           0
num_of_likes         0
retweet_count        0
text                 0
user_location     3422
dtype: int64

In [5]:
def clean_text(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text
df['text'] = df['text'].apply(lambda x:clean_text(x))
df['text_len'] = df['text'].astype(str).apply(len)
df['text_word_count'] = df['text'].apply(lambda x: len(str(x).split()))
df.describe()

Unnamed: 0,user_id,follower_count,friends_count,tweet_id,num_of_likes,retweet_count,text_len,text_word_count
count,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0,9822.0
mean,65004610.0,57065420.0,30599.115251,1.190929e+18,125646.6,24770.27,92.760538,16.442883
std,84685560.0,22673600.0,104764.219819,2.259275e+17,190589.7,52489.06,75.701578,13.658569
min,428333.0,34134050.0,0.0,1405978000.0,0.0,17.0,0.0,0.0
25%,15846410.0,41758030.0,119.0,1.087677e+18,20539.5,2540.25,30.0,5.0
50%,26257170.0,49941270.0,495.0,1.263572e+18,55247.0,6917.0,72.0,13.0
75%,100220900.0,64940000.0,1800.0,1.36132e+18,168610.0,21894.75,137.0,25.0
max,471741700.0,129803000.0,590251.0,1.418822e+18,4232344.0,1515265.0,293.0,68.0


### config 

In [6]:
# Best Config
pretrained_model_name = 'bert-large-uncased'
do_lower_case = True
max_len = 64
bert_hidden = 1024
dropout = 0.3
train_batch_size=16
valid_batch_size=32
epochs = 5
learning_rate = 5e-5
adam_epsilon=1e-8
n_meta_features = 3
warmup_steps=0
LOSS = 'MSE'
seed_everything()

## DataLoader

In [7]:
class TweetDataset:
    def __init__(self, dataframe, tweet, targets,meta_features):
      self.df = dataframe
      self.tweet = tweet
      self.tokenizer = transformers.BertTokenizer.from_pretrained(pretrained_model_name,do_lower_case = do_lower_case)
      self.max_length = max_len
      self.targets = targets
      self.meta_features = meta_features

    def __len__(self):
        return len(self.tweet)

    def __getitem__(self, item):
      tweet = str(self.tweet[item])
      tweet = " ".join(tweet.split())

      inputs = self.tokenizer.encode_plus(
          tweet,
          None,
          add_special_tokens=True,
          max_length=self.max_length,
          truncation_strategy="longest_first",
          pad_to_max_length=True,
          truncation=True
      )
      
      ids = inputs["input_ids"]
      mask = inputs["attention_mask"]
      token_type_ids = inputs["token_type_ids"]

      meta = np.array(self.df.iloc[item][self.meta_features].values, dtype=np.float32)

      return {
          'ids': torch.tensor(ids, dtype=torch.long),
          'mask': torch.tensor(mask, dtype=torch.long),
          'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
          'targets': torch.tensor(self.targets[item], dtype=torch.float),
          'meta': torch.tensor(meta, dtype=torch.float)
      }


dfx = pd.read_csv(f"{path}data.csv").dropna().reset_index(drop = True)
meta = ['follower_count_scaler','friends_count_scaler', 'retweet_count_scaler']


follower_count = dfx.follower_count.values
friends_count = dfx.friends_count.values
retweet_count = dfx.retweet_count.values
num_of_likes = dfx.num_of_likes.values
follower_scaler, follower_count = label_scaling(follower_count.reshape(-1, 1))
friends_scaler, friends_count = label_scaling(friends_count.reshape(-1, 1))
retweet_scaler, retweet_count = label_scaling(retweet_count.reshape(-1, 1))
num_of_likes_scaler, num_of_likes = label_scaling(num_of_likes.reshape(-1, 1))
dfx[["follower_count_scaler"]] = follower_count
dfx[["friends_count_scaler"]] = friends_count
dfx[["retweet_count_scaler"]] = retweet_count
dfx[["num_of_likes_scaler"]] = num_of_likes


dset = TweetDataset(
        dataframe=dfx,
        tweet=df.text.values,
        targets=dfx.num_of_likes_scaler.values,
        meta_features = meta
        )
print(dfx.iloc[0])
print(dset[0])


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

user_id                                                                                                                                                813286
user_name                                                                                                                                        Barack Obama
location                                                                                                                                       Washington, DC
description                                                                                                                 Dad, husband, President, citizen.
follower_count                                                                                                                                      129803017
friends_count                                                                                                                                          590251
verified                                            

### Model

In [8]:
class BertBaseUncasedNext(nn.Module) :
    def __init__(self) : 
      super(BertBaseUncasedNext,self).__init__() 
      self.bert = transformers.BertModel.from_pretrained(pretrained_model_name, output_hidden_states=True) 
      self.drop_out = nn.Dropout(0.3) 
      self.meta_features = nn.Sequential(nn.Linear(n_meta_features, 700),
                          nn.BatchNorm1d(700),
                          nn.ReLU(),
                          nn.Dropout(p=0.3),
                          nn.Linear(700, 250),
                          nn.BatchNorm1d(250),
                          nn.ReLU(),
                          nn.Dropout(p=0.3))

      self.l0 =  nn.Linear(bert_hidden * 2+250, 1)
      torch.nn.init.normal_(self.l0.weight, std=0.02)
        
    def _get_cls_vec(self, vec):
      return vec[:,0,:].view(-1, bert_hidden)
    def forward(self,ids,attention_mask,token_type_ids, meta):
      _, _, hidden_states = self.bert(
          ids,
          attention_mask=attention_mask,
          token_type_ids=token_type_ids
      )
      vec1 = self._get_cls_vec(hidden_states[-1])
      vec2 = self._get_cls_vec(hidden_states[-2])
      meta_features = self.meta_features(meta)
      #print("meta_features shape = ", meta_features.shape, vec1.shape, vec2.shape)
      out = torch.cat([vec1, vec2, meta_features], dim=1)
      #print("out shape = ", out.shape)
      out = self.drop_out(out)
      logits = self.l0(out)
      return logits


In [9]:
def loss_fn(y_pred, y_true, loss_name=LOSS):
  if type(loss_name) == str:
      if loss_name == 'MSE':
          loss_func = nn.MSELoss(reduction='mean')
      elif loss_name == 'MAE':
          loss_func = nn.L1Loss(reduction='mean')
      elif loss_name == 'Huber':
          loss_func = nn.SmoothL1Loss(reduction='mean')
      elif loss_name == 'LogCosh':
          loss_func = LogCoshLoss()
  return loss_func(y_pred, y_true.view(-1,1))


def train_fn(data_loader, model, optimizer, device, scheduler, n_examples):
  model.train()
  losses = AverageMeter()
  tk0 = tqdm(data_loader, total=len(data_loader))
  start = time.time()
  train_losses = []
  fin_targets = []
  fin_outputs = []
  for bi, d in enumerate(tk0):
    ids = d["ids"]
    mask = d["mask"]
    token_type_ids = d["token_type_ids"]
    targets = d["targets"]
    meta_features = d["meta"]
    #print("meta_features here = ", meta_features.shape)
    ids = ids.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)
    token_type_ids = token_type_ids.to(device, dtype=torch.long)
    targets = targets.to(device, dtype=torch.float)
    meta_features = meta_features.to(device, dtype=torch.float)
    # Reset gradients
    model.zero_grad()

    outputs = model(
        ids=ids,
        attention_mask=mask,
        token_type_ids = token_type_ids,
        meta = meta_features
    )

    loss = loss_fn(outputs, targets)
    train_losses.append(loss.item())

    #outputs = torch.round(nn.ReLU()(outputs)).squeeze()
    outputs = nn.Sigmoid()(outputs).squeeze()

    targets = targets.squeeze()

    end = time.time()
    if (bi % 50 == 0 and bi != 0) or (bi == len(data_loader) - 1):
      print(f'bi={bi},Train loss={loss.item()}, time={end-start}')
    
    loss.backward() # Calculate gradients based on loss
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step() # Adjust weights based on calculated gradients
    scheduler.step() # Update scheduler
    losses.update(loss.item(), ids.size(0))
    tk0.set_postfix(loss = losses.avg)

    if isinstance(targets.cpu().detach().numpy().tolist(), list) == False:
        fin_targets.append(targets.cpu().detach().numpy().tolist())
        fin_outputs.append(outputs.cpu().detach().numpy().tolist())
    else:
        fin_targets.extend(targets.cpu().detach().numpy().tolist())
        fin_outputs.extend(outputs.cpu().detach().numpy().tolist())
  mean_squared_log = mean_squared_log_error(fin_targets, fin_outputs)
  r2_s = r2_score(fin_targets, fin_outputs)
  return mean_squared_log, r2_s, np.mean(train_losses)

def eval_fn(data_loader, model, device, n_examples):
  model.eval()
  start = time.time()
  losses = AverageMeter()
  val_losses = []
  fin_targets = []
  fin_outputs = []
  with torch.no_grad():
      #tk0 = tqdm(data_loader, total=len(data_loader))
    for bi, d in enumerate(data_loader):
      ids = d["ids"]
      mask = d["mask"]
      token_type_ids = d["token_type_ids"]
      targets = d["targets"]
      meta_features = d["meta"]
      ids = ids.to(device, dtype=torch.long)
      mask = mask.to(device, dtype=torch.long)
      token_type_ids = token_type_ids.to(device, dtype=torch.long)
      targets = targets.to(device, dtype=torch.float)
      meta_features = meta_features.to(device, dtype=torch.float)

      outputs = model(
          ids=ids,
          attention_mask=mask,
          token_type_ids = token_type_ids,
          meta = meta_features
      )
      loss = loss_fn(outputs, targets)
      val_losses.append(loss.item())

      targets = targets.squeeze()
      #outputs = torch.round(nn.ReLU()(outputs)).squeeze()
      outputs = nn.Sigmoid()(outputs).squeeze()

      if isinstance(targets.cpu().detach().numpy().tolist(), list) == False:
          fin_targets.append(targets.cpu().detach().numpy().tolist())
          fin_outputs.append(outputs.cpu().detach().numpy().tolist())
      else:
          fin_targets.extend(targets.cpu().detach().numpy().tolist())
          fin_outputs.extend(outputs.cpu().detach().numpy().tolist())
  
  return fin_outputs,fin_targets, np.mean(val_losses)

In [10]:
def min_max_dataframe(dfx):
  follower_count = dfx.follower_count.values
  friends_count = dfx.friends_count.values
  retweet_count = dfx.retweet_count.values
  num_of_likes = dfx.num_of_likes.values
  follower_scaler, follower_count = label_scaling(follower_count.reshape(-1, 1))
  friends_scaler, friends_count = label_scaling(friends_count.reshape(-1, 1))
  retweet_scaler, retweet_count = label_scaling(retweet_count.reshape(-1, 1))
  num_of_likes_scaler, num_of_likes = label_scaling(num_of_likes.reshape(-1, 1))
  dfx[["follower_count_scaler"]] = follower_count
  dfx[["friends_count_scaler"]] = friends_count
  dfx[["retweet_count_scaler"]] = retweet_count
  dfx[["num_of_likes_scaler"]] = num_of_likes
  return dfx,num_of_likes_scaler

In [11]:
def run():
  dfx = pd.read_csv(f"{path}data.csv").dropna().reset_index(drop=True)
  df['text'] = df['text'].apply(lambda x:clean_text(x))

  meta = ['follower_count_scaler','friends_count_scaler', 'retweet_count_scaler']
  #meta = ['follower_count','friends_count', 'retweet_count']

  df_train, df_valid = model_selection.train_test_split(
      dfx, 
      test_size=0.1, 
      random_state=46, 
  )
  df_train = df_train.reset_index(drop=True)
  df_valid = df_valid.reset_index(drop=True)

  df_train, train_num_of_likes_scaler = min_max_dataframe(df_train)
  df_valid, valid_num_of_likes_scaler = min_max_dataframe(df_valid)

  print("train len - {} valid len - {}".format(len(df_train), len(df_valid)))
  df_train = df_train.sample(frac=1).reset_index(drop=True)
  meta = ['follower_count_scaler','friends_count_scaler', 'retweet_count_scaler']

  train_dataset = TweetDataset(
      dataframe = df_train,
      tweet=df_train.text.values,
      targets=df_train.num_of_likes_scaler.values,
      meta_features = meta
  )

  train_data_loader = torch.utils.data.DataLoader(
      train_dataset,
      batch_size=train_batch_size,
      shuffle=True,
      num_workers=4
  )

  valid_dataset = TweetDataset(
    dataframe = df_valid,
    tweet=df_valid.text.values,
    targets=df_valid.num_of_likes_scaler.values,
    meta_features = meta
  )

  valid_data_loader = torch.utils.data.DataLoader(
      valid_dataset,
      batch_size=valid_batch_size,
      num_workers=2
  )

  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model = BertBaseUncasedNext()#BertBaseUncased()
  model.to(device)
  

  param_optimizer = list(model.named_parameters())
  no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
  # Define two sets of parameters: those with weight decay, and those without
  optimizer_parameters = [
      {
          "params": [
              p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
          ],
          "weight_decay": 0.001,
      },
      {
          "params": [
              p for n, p in param_optimizer if any(nd in n for nd in no_decay)
          ],
          "weight_decay": 0.0,
      },
  ]

  num_train_steps = int(len(df_train) / train_batch_size * epochs)

  optimizer = AdamW(optimizer_parameters, lr=learning_rate, eps=adam_epsilon)
  '''
  Create a scheduler to set the learning rate at each training step
  "Create a schedule with a learning rate that decreases linearly after linearly increasing during a warmup period." (https://pytorch.org/docs/stable/optim.html)
  Since num_warmup_steps = 0, the learning rate starts at 3e-5, and then linearly decreases at each training step
  '''
  scheduler = get_linear_schedule_with_warmup(
      optimizer,
      num_warmup_steps=warmup_steps,
      num_training_steps=num_train_steps
  )
  #es = utils.EarlyStopping(patience=15, mode="max")
  print("STARTING TRAINING for ...\n")
  #history = defaultdict(list)
  loss_history_epoch = []
  metric_history_epoch = []

  best_accuracy = 999.9

  for epoch in range(epochs):
      print(f'Epoch {epoch + 1}/{epochs}')
      print('-' * 10)
      

      train_mse_log, train_r2_score, train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler, len(df_train))
      
      print(f'Train MSE log {train_mse_log} Train R2 Score {train_r2_score} Train MSE Loss {train_loss}')

      fin_pred, fin_val, val_loss = eval_fn(valid_data_loader, model, device, len(df_valid))

      print("scaler_fin_pred = ",fin_pred)
      print("scaler_fin_val = ", fin_val)

      val_mse_log = mean_squared_log_error(fin_val, fin_pred)
      fin_pred = np.array(fin_pred).reshape(-1, 1)
      fin_val = np.array(fin_val).reshape(-1, 1)
      fin_pred = label_inverse_scaling(valid_num_of_likes_scaler, fin_pred)
      fin_val = label_inverse_scaling(valid_num_of_likes_scaler, fin_val)

      val_r2_score = r2_score(fin_val, fin_pred)

      print(f'Val MSE log {val_mse_log} Val R2 Score {val_r2_score} Val MSE Loss {val_loss}')
      
      if val_mse_log < best_accuracy:
        best_accuracy = val_mse_log
        print("actual_fin_pred = ",list(map(int, fin_pred.squeeze()[:15])))
        print("actual_fin_val = ", list(map(int, fin_val.squeeze()[:15])))
        #torch.save(model.state_dict(), f"{args.model_path}{args.model_specification}.bin")

  
  del model, optimizer, scheduler, train_data_loader, valid_data_loader, train_dataset, valid_dataset
  torch.cuda.empty_cache()
  torch.cuda.synchronize()
  print("##################################### Task End ############################################")
  print(gc.collect())

run()

train len - 5220 valid len - 580


Downloading:   0%|          | 0.00/434 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

STARTING TRAINING for ...

Epoch 1/5
----------


  0%|          | 0/327 [00:00<?, ?it/s]

bi=50,Train loss=0.23908494412899017, time=65.0029456615448
bi=100,Train loss=0.20615644752979279, time=130.4039797782898
bi=150,Train loss=0.08075447380542755, time=196.08496761322021
bi=200,Train loss=0.07382125407457352, time=261.50302743911743
bi=250,Train loss=0.11365649849176407, time=326.9258306026459
bi=300,Train loss=0.18015800416469574, time=392.24396276474
bi=326,Train loss=0.21348293125629425, time=425.7994341850281
Train MSE log 0.02557934607473719 Train R2 Score -0.6046447120182288 Train MSE Loss 0.20460921210387795
scaler_fin_pred =  [0.6361764669418335, 0.6505962014198303, 0.6654788255691528, 0.6246626377105713, 0.6823655962944031, 0.6531709432601929, 0.7265320420265198, 0.669009804725647, 0.5554510354995728, 0.7036230564117432, 0.6781764626502991, 0.6396639943122864, 0.6479557752609253, 0.611338198184967, 0.7175982594490051, 0.6473580598831177, 0.6374599933624268, 0.6125649809837341, 0.6389669179916382, 0.7166216373443604, 0.70102459192276, 0.6117655634880066, 0.651188

  0%|          | 0/327 [00:00<?, ?it/s]

bi=50,Train loss=0.043466225266456604, time=65.97493934631348
bi=100,Train loss=0.058754194527864456, time=131.31973338127136
bi=150,Train loss=0.06592664122581482, time=196.61799430847168
bi=200,Train loss=0.09456001967191696, time=261.9010155200958
bi=250,Train loss=0.06142783910036087, time=327.109432220459
bi=300,Train loss=0.06822415441274643, time=392.4107644557953
bi=326,Train loss=0.152204230427742, time=426.0226299762726
Train MSE log 0.024215539981474084 Train R2 Score -0.48191491305993295 Train MSE Loss 0.07335738766516932
scaler_fin_pred =  [0.615663468837738, 0.6201604008674622, 0.6589311957359314, 0.5961766839027405, 0.6862502694129944, 0.6320425271987915, 0.6749016642570496, 0.6414681673049927, 0.5205132961273193, 0.6524966359138489, 0.642731249332428, 0.6159545183181763, 0.6131099462509155, 0.5708497762680054, 0.6704599261283875, 0.624786913394928, 0.625557005405426, 0.5782385468482971, 0.6169664859771729, 0.6683956980705261, 0.694983959197998, 0.6018317341804504, 0.631

  0%|          | 0/327 [00:00<?, ?it/s]

bi=50,Train loss=0.06629252433776855, time=66.09664559364319
bi=100,Train loss=0.02635226584970951, time=131.3730251789093
bi=150,Train loss=0.054432213306427, time=196.6701157093048
bi=200,Train loss=0.023911278694868088, time=262.0219931602478
bi=250,Train loss=0.05867787450551987, time=327.2570559978485
bi=300,Train loss=0.01659034751355648, time=392.61895179748535
bi=326,Train loss=0.013364356011152267, time=426.2379660606384
Train MSE log 0.02372313199363049 Train R2 Score -0.44109023970002426 Train MSE Loss 0.04865315443742166
scaler_fin_pred =  [0.6399248838424683, 0.6434890627861023, 0.6814473867416382, 0.6189308762550354, 0.6963306069374084, 0.6559510231018066, 0.6965950727462769, 0.6645825505256653, 0.5441972017288208, 0.7019507884979248, 0.6690928936004639, 0.636244535446167, 0.6414859294891357, 0.5791921019554138, 0.6919480562210083, 0.6427803635597229, 0.6458058953285217, 0.5991620421409607, 0.6304765939712524, 0.7072147727012634, 0.7032067179679871, 0.6194930076599121, 0.

  0%|          | 0/327 [00:00<?, ?it/s]

bi=50,Train loss=0.03656695410609245, time=66.0284366607666
bi=100,Train loss=0.03434588387608528, time=131.3239996433258
bi=150,Train loss=0.026389367878437042, time=196.7140417098999
bi=200,Train loss=0.012570155784487724, time=261.97946190834045
bi=250,Train loss=0.04823390766978264, time=327.25404715538025
bi=300,Train loss=0.033311132341623306, time=392.63171648979187
bi=326,Train loss=0.020234771072864532, time=426.28857707977295
Train MSE log 0.02362690925140221 Train R2 Score -0.4304102280140454 Train MSE Loss 0.038634674788797306
scaler_fin_pred =  [0.6212857961654663, 0.6074573993682861, 0.6501105427742004, 0.6019283533096313, 0.663584291934967, 0.6168912649154663, 0.6698299646377563, 0.6403513550758362, 0.5254549980163574, 0.6747559905052185, 0.643090546131134, 0.6102002263069153, 0.6214457750320435, 0.5626075267791748, 0.6652311086654663, 0.6175280809402466, 0.6071569323539734, 0.5811305046081543, 0.6044628024101257, 0.6801130771636963, 0.6710959076881409, 0.586458384990692

  0%|          | 0/327 [00:00<?, ?it/s]

bi=50,Train loss=0.018875639885663986, time=66.20984411239624
bi=100,Train loss=0.030584950000047684, time=131.5569634437561
bi=150,Train loss=0.019559049978852272, time=197.01649117469788
bi=200,Train loss=0.03663696348667145, time=262.30348944664
bi=250,Train loss=0.03436163067817688, time=327.5966143608093
bi=300,Train loss=0.016288259997963905, time=392.8662917613983
bi=326,Train loss=0.24884551763534546, time=426.52211332321167
Train MSE log 0.02329272456332632 Train R2 Score -0.40834429290756513 Train MSE Loss 0.0366644722292775
scaler_fin_pred =  [0.6223194003105164, 0.6213260889053345, 0.6668490171432495, 0.6064401268959045, 0.6830374002456665, 0.6365767121315002, 0.6784420013427734, 0.6419785618782043, 0.5360091924667358, 0.6730961799621582, 0.6467674970626831, 0.6167533993721008, 0.6233878135681152, 0.5718642473220825, 0.6730840802192688, 0.6228489875793457, 0.6270645260810852, 0.5921007990837097, 0.6155146360397339, 0.6817303895950317, 0.6914934515953064, 0.6027823686599731,