In [1]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /home/patryk/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/patryk/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/patryk/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [2]:
import nltk
import re

import pickle

import numpy as np
import os
from gensim.models import KeyedVectors
import fasttext
from matplotlib import pyplot as plt
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

from tqdm import tqdm
from typing import Union, Dict, Tuple, Optional

from datetime import datetime

import pandas as pd
import torch
import torch
import torchvision as tv
import torchvision.models as M
import torchvision.transforms as T
import cv2

from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter

In [2]:
DATA_PATH = f"data/"
SEED = 0

In [3]:
df = pd.read_csv(f"{DATA_PATH}merged_data_youtube.csv", index_col=0)
df_view_max = df["view_count"].max()
df_view_min = df["view_count"].min()

df["view_count"] = ((df["view_count"]-df_view_min)/(df_view_max-df_view_min)).astype(np.float32) # Normalize view count
df["likes"] = ((df["likes"]-df["likes"].min())/(df["likes"].max()-df["likes"].min())).astype(np.float32) # Normalize likes
df["comment_count"] = ((df["comment_count"]-df["comment_count"].min())/(df["comment_count"].max()-df["comment_count"].min())).astype(np.float32) # Normalize comment count
# df = df.dropna()
print(df_view_max)
print(df_view_min)

548866548.0
0.0


In [4]:
df["view_count"] = df["view_count"].fillna(0)
df["likes"] = df["likes"].fillna(0)
df["comment_count"] = df["comment_count"].fillna(0)

df["title"] = df["title"].fillna("")

In [5]:
en_stop = stopwords.words('english')

def preprocess_text(document, stemmer):
    # Remove all the special characters
    document = re.sub(r"\W", " ", str(document))

    # remove all single characters
    document = re.sub(r"\s+[a-zA-Z]\s+", " ", document)

    # Remove single characters from the start
    document = re.sub(r"\^[a-zA-Z]\s+", " ", document)

    # Substituting multiple spaces with single space
    document = re.sub(r"\s+", " ", document, flags=re.I)

    # Removing prefixed 'b'
    document = re.sub(r"^b\s+", "", document)

    # Converting to Lowercase
    document = document.lower()

    # Lemmatization
    tokens = document.split()
    tokens = [stemmer.lemmatize(word) for word in tokens]
    tokens = [word for word in tokens if word not in en_stop]
    tokens = [word for word in tokens if len(word) > 3]

    # preprocessed_text = ' '.join(tokens)
    preprocessed_text = tokens

    return preprocessed_text

def pad_collate(batch):
    (texts, images, labels) = zip(*batch)
    texts = [text if len(text) else torch.zeros(1, 300) for text in texts]
    text_lens = [len(text) for text in texts]
    texts_pad = pad_sequence(texts, batch_first=True, padding_value=0)
    texts_pad = pack_padded_sequence(
        texts_pad, text_lens, batch_first=True, enforce_sorted=False
    )
    return texts_pad, images, torch.tensor(labels)


def strip_n_collate(batch):
    strip_n: int = 40
    (texts, summaries, labels) = zip(*batch)
    text_lens = [len(x) for x in texts]
    summary_lens = [len(x) for x in summaries]

    texts_pad = pad_sequence(texts, batch_first=True, padding_value=0)
    summaries_pad = pad_sequence(summaries, batch_first=True, padding_value=0)

    texts_pad = [item[:40, :].unsqueeze(dim=0) for item in texts_pad]
    summaries_pad = [item[:40, :].unsqueeze(dim=0) for item in summaries_pad]

    texts_pad = torch.cat(texts_pad)
    summaries_pad = torch.cat(summaries_pad)

    # texts_pad = pack_padded_sequence(
    #     texts_pad, text_lens, batch_first=True, enforce_sorted=False
    # )
    # summaries_pad = pack_padded_sequence(
    #     summaries_pad, summary_lens, batch_first=True, enforce_sorted=False
    # )

    return texts_pad, summaries_pad, torch.tensor(labels)

In [6]:
class LSTM(nn.Module):
    def __init__(self, kwargs):
        super(LSTM, self).__init__()
    
        self.num_classes = kwargs.get("num_classes", 1)
        self.input_size = kwargs.get("input_size", 100)
        self.hidden_size = kwargs.get("hidden_size", 64)
        self.num_layers = kwargs.get("num_layers", 2)
        self.image_data_loader = kwargs.get("image_data_loader")

        self.resnet = M.resnet50(pretrained=False)
        self.resnet.fc = torch.nn.Identity()

        self.lstm_title = nn.LSTM(
            input_size=self.input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            batch_first=True,
        )  # lstm

        self.fc_1 = nn.Linear(2048 + self.hidden_size, self.hidden_size)  # fully connected 1
        self.fc = nn.Linear(self.hidden_size, self.num_classes)  # fully connected last layer

        self.relu = nn.ReLU()

    def forward(self, text, image):
        # Propagate input through LSTM
        output_title, (hn_title, cn_title) = self.lstm_title(
            text
        )  # lstm with input, hidden, and internal state

        hn_title = hn_title[-1].view(
            -1, self.hidden_size
        )  # reshaping the data for Dense layer next
        imgs = torch.cat([self.image_data_loader.dataset[img][0].unsqueeze(dim=0) for img in image]).cuda()

        print(imgs.shape)

        out_img = self.resnet(imgs)
        out = torch.cat((hn_title, out_img), axis=1)
        out = self.relu(out)
        out = self.fc_1(out)  # first Dense
        out = self.relu(out)  # relu
        out = self.fc(out)  # Final Output
        return out

In [7]:
from sklearn.metrics import f1_score, precision_score, r2_score


def _ensure_exists(path_out: str) -> None:
    if os.path.exists(path_out):
        return
    os.makedirs(path_out)


def count_correct(y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
    preds = torch.argmax(y_pred, dim=1)
    return (preds == y_true).float().sum()


def calc_fscore(y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
    preds = torch.argmax(y_pred, dim=1)
    return f1_score(y_true, preds, average="macro")

def calc_r2score(y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
    return r2_score(y_true.detach().numpy(), y_pred.detach().numpy(), multioutput='variance_weighted')

def calc_precission(y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
    preds = torch.argmax(y_pred, dim=1)
    return precision_score(y_true, preds, average="macro", labels=np.unique(preds))


def validate(
        model: nn.Module, loss_fn: torch.nn.CrossEntropyLoss, dataloader: DataLoader
) -> Tuple[torch.Tensor, torch.Tensor]:
    loss = 0
    r2_score_sum = 0
    _all = 0
    iters = 0
    for X_1_batch, X_2_batch, y_batch in dataloader:
        y_pred = model(X_1_batch.cuda(), X_2_batch)
        _all += len(y_pred)
        iters += 1
        loss += loss_fn(y_pred.flatten(), y_batch.cuda())
        r2_score_sum += r2_score(y_pred.cpu().flatten(), y_batch)
    return loss / _all, r2_score_sum / iters


def fit(
        model: nn.Module,
        optimizer: optim.Optimizer,
        loss_fn: torch.nn.CrossEntropyLoss,
        train_dl: DataLoader,
        val_dl: DataLoader,
        writer: SummaryWriter,
        test_dl: Union[None, DataLoader] = None,
        epochs: int = 50,
        print_metrics: bool = True,
        patience: int = 5,
        output_path: str = "data/checkpoints/best",
        run_prefix: str = "early_stopping",
) -> Dict[str, list]:
    losses = {"train": [], "val": [], "test": []}
    r_2s = {"train": [], "val": [], "test": []}

    min_val_loss = 1e10
    current_patience = 0
    for epoch in tqdm(range(epochs)):
        model.train()  # Przełączenie na tryb uczenia modelu - istotne dla takich warstw jak Dropuot czy BatchNorm
        for X_1_batch, X_2_batch, y_batch in train_dl:
            X_1_batch, X_2_batch, y_batch = (
                X_1_batch.cuda(),
                X_2_batch,
                y_batch.cuda(),
            )
            y_pred = model(
                X_1_batch, X_2_batch
            )  # Uzyskanie pseudoprawdopodobieństw dla próbek z minibatcha
            loss = loss_fn(y_pred.flatten(), y_batch)  # Policzenie funkcji straty
            
            loss.backward()  # Wsteczna propagacja z wyniku funkcji straty - policzenie gradientów i zapisanie ich w tensorach (parametrach)
            optimizer.step()  # Aktualizacja parametrów modelu przez optymalizator na podstawie gradientów zapisanych w tensorach (parametrach) oraz lr
            optimizer.zero_grad()  # Wyzerowanie gradientów w modelu, alternatywnie można wywołać model.zero_grad()

        model.eval()  # Przełączenie na tryb ewaluacji modelu - istotne dla takich warstw jak Dropuot czy BatchNorm
        with torch.no_grad():  # Wstrzymujemy przeliczanie i śledzenie gradientów dla tensorów - w procesie ewaluacji modelu nie chcemy zmian w gradientach
            train_loss, train_r2 = validate(
                model, loss_fn, train_dl
            )
            val_loss, val_r2 = validate(model, loss_fn, val_dl)

            if val_loss < min_val_loss:
                min_val_loss = val_loss
                current_patience = 0
                torch.save(
                    obj={
                        "epoch": epoch,
                        "model_state_dict": model.state_dict(),
                        "optimizer_state_dict": optimizer.state_dict(),
                    },
                    f=output_path + "_" + run_prefix,
                )
            else:
                current_patience += 1

            if test_dl:
                test_loss, test_r2 = validate(model, loss_fn, test_dl)
                losses["test"].append(test_loss)
                r_2s["test"].append(test_r2)

        losses["train"].append(train_loss)
        losses["val"].append(val_loss)

        r_2s["train"].append(train_r2)
        r_2s["val"].append(val_r2)

        writer.add_scalars(
            main_tag=f"{run_prefix} loss",
            tag_scalar_dict={"train": train_loss, "dev": val_loss},
            global_step=epoch + 1,
        )

        writer.add_scalars(
            main_tag=f"{run_prefix} r_2 score",
            tag_scalar_dict={"train": train_r2, "dev": val_r2},
            global_step=epoch + 1,
        )

        if print_metrics:
            print(
                f"Epoch {epoch}: "
                f"train loss = {train_loss:.3f} (r2: {train_r2:.3f})"
                f"validation loss = {val_loss:.3f} (r2: {val_r2:.3f}))"
            )

        if current_patience >= patience:
            break

    model.eval()  # Przełączenie na tryb ewaluacji modelu - istotne dla takich warstw jak Dropuot czy BatchNorm
    return losses, r_2s

In [8]:
class TitleDataset(torch.utils.data.Dataset):
    def __init__(
            self,
            all_data,
            model_name: str,
            train: bool,
            data_path: str,
            model = None,
            k: int = 10,
            n_start: int = 0,
            prefix: str = "train"
    ):
        """
        k - k is k letter in 'k-crossvalidation'
        """
        self.model_name = model_name
        self.model = model
        self.all_data_len = len(all_data)
        self.prefix = prefix
        self.all_data_len = len(all_data)
        self.all_data = all_data
        self.k = k
        self.train = train
        self.splits = []
        self.data = []
        self.prepare_splits(n_start)
        # self.data_path = os.path.join(data_path, model_name, self.prefix)
        self.data_path = os.path.join(data_path, model_name, self.prefix)
        self.stemmer = WordNetLemmatizer()

    def prepare_splits(self, n):
        self.splits = [
            list(range(len(self.all_data)))[
            i
            * len(self.all_data)
            // self.k : (i + 1)
                        * len(self.all_data)
                        // self.k
            ]
            if i != self.k - 1
            else list(range(len(self.all_data)))[i * len(self.all_data) // self.k :]
            for i in range(self.k)
        ]
        if self.train:
            self.data = [
                item for p, split in enumerate(self.splits) if p != n for item in split
            ]
        else:
            self.data = self.splits[n]


    def _prepare(self):
        _ensure_exists(self.data_path)
        for i, item in tqdm(enumerate(self.all_data), total=self.all_data_len,):
            with open(
                    os.path.join(self.data_path, f"{self.prefix}_{i}.pkl"), "wb"
            ) as file:
                pickle.dump(
                    (
                        torch.tensor(
                            [
                                self.model[word]
                                for word in preprocess_text(item[0], self.stemmer)
                            ]
                        ),
                        item[1],
                        item[2],
                    ),
                    file,
                )

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        idx = self.data[idx]
        with open(
                os.path.join(self.data_path, f"{self.prefix}_{idx}.pkl"), "rb"
        ) as file:
            return pickle.load(file)

In [9]:
models = {
    "fasttext_crawl":{"model": fasttext.load_model("models/cc.en.300.bin")}
}



In [9]:
dataset = tv.datasets.ImageFolder(f"{DATA_PATH}images")

data_transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    T.Resize(size=(180, 240)),
])
dataset.transform = data_transform

image_data_loader = torch.utils.data.DataLoader(dataset)

In [12]:
# all_data = []

# for i, (image, label) in enumerate(tqdm(image_data_loader, 0)):
#     filename = image_data_loader.dataset.samples[i][0].split('/')[-1].replace('.jpg','')
#     all_data.append([
#         df[df["video_id"] == filename]["title"].iloc[0],
#         i,
#         df[df["video_id"] == filename]["view_count"].iloc[0]
#     ])
# with open("all_data.pkl", "wb") as file:
#     pickle.dump(all_data, file)

In [13]:
with open("all_data.pkl", "rb") as file:
    all_data = pickle.load(file)
    
# all_data = all_data[:500]

In [14]:
BATCH_SIZE = 32

for model_name in models:
    models[model_name]["train"] = TitleDataset(
        all_data, model_name, True, "data/embeddings", models[model_name]["model"], prefix="train"
    )
    models[model_name]["test"] = TitleDataset(
        all_data,
        model_name,
        False,
        "data/embeddings",
        models[model_name]["model"], prefix="train"
    )
    models[model_name]["train_dl_lstm"] = DataLoader(
        models[model_name]["train"], batch_size=BATCH_SIZE, collate_fn=pad_collate
    )
    models[model_name]["test_dl_lstm"] = DataLoader(
        models[model_name]["test"], batch_size=BATCH_SIZE, collate_fn=pad_collate
    )

In [15]:
# for model_name in models:
#     models[model_name]["train"]._prepare()

In [15]:
log_dir = "tensorboard_logs"
_ensure_exists(log_dir)

writer_tensorboard = SummaryWriter(log_dir)

%reload_ext tensorboard
%tensorboard --logdir $log_dir --port=6011

Reusing TensorBoard on port 6011 (pid 404262), started 0:07:43 ago. (Use '!kill 404262' to kill it.)

In [None]:
EPOCHS = 200

models_to_train = ["fasttext_crawl"]
# models_to_train = ["fasttext_crawl"]


for model_name in models_to_train:
    kwargs = {
        "num_classes": 1,
        "input_size": 300,
        "hidden_size": 128,
        "num_layers": 2,
        "image_data_loader": image_data_loader
    }
    learning_rate = 0.0001

    model_type_name = "model_lstm"
    dataloader_train_name = "train_dl_lstm"
    dataloader_test_name = "test_dl_lstm"

    time_stamp = datetime.now().strftime("%d_%m_%y_%H_%M_%S")
    models[model_name][model_type_name] = LSTM(kwargs).cuda()
    models[model_name]["loss_fn"] = torch.nn.MSELoss(reduction="sum")
    models[model_name]["optimizer"] = torch.optim.Adam(
        models[model_name][model_type_name].parameters(), lr=learning_rate
    )

    result = fit(
        model=models[model_name][model_type_name],
        optimizer=models[model_name]["optimizer"],
        loss_fn=models[model_name]["loss_fn"],
        train_dl=models[model_name][dataloader_train_name],
        val_dl=models[model_name][dataloader_test_name],
        writer=writer_tensorboard,
        epochs=EPOCHS,
        patience=30,
        run_prefix=model_name + "_" + model_type_name + "_" + time_stamp,
        print_metrics=True,
    )

    checkpoint = torch.load(
        f"data/checkpoints/best_{model_name}_{model_type_name}_{time_stamp}"
    )
    models[model_name][model_type_name].load_state_dict(checkpoint["model_state_dict"])
    models[model_name]["optimizer"].load_state_dict(checkpoint["optimizer_state_dict"])

  0%|          | 1/200 [09:32<31:39:39, 572.76s/it]

Epoch 0: train loss = 0.000 (r2: -2.559)validation loss = 0.001 (r2: -3.241))


In [18]:
models[model_name][model_type_name]

LSTM(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0):

In [19]:
model_name = "fasttext_crawl"
model_type_name = "model_lstm"
time_stamp = "18_01_22_23_47_41"

In [20]:
checkpoint = torch.load(
    f"data/checkpoints/best_{model_name}_{model_type_name}_{time_stamp}"
)
models[model_name][model_type_name].load_state_dict(checkpoint["model_state_dict"])
models[model_name]["optimizer"].load_state_dict(checkpoint["optimizer_state_dict"])

RuntimeError: Error(s) in loading state_dict for LSTM:
	Missing key(s) in state_dict: "resnet.layer1.0.conv3.weight", "resnet.layer1.0.bn3.weight", "resnet.layer1.0.bn3.bias", "resnet.layer1.0.bn3.running_mean", "resnet.layer1.0.bn3.running_var", "resnet.layer1.0.downsample.0.weight", "resnet.layer1.0.downsample.1.weight", "resnet.layer1.0.downsample.1.bias", "resnet.layer1.0.downsample.1.running_mean", "resnet.layer1.0.downsample.1.running_var", "resnet.layer1.1.conv3.weight", "resnet.layer1.1.bn3.weight", "resnet.layer1.1.bn3.bias", "resnet.layer1.1.bn3.running_mean", "resnet.layer1.1.bn3.running_var", "resnet.layer1.2.conv1.weight", "resnet.layer1.2.bn1.weight", "resnet.layer1.2.bn1.bias", "resnet.layer1.2.bn1.running_mean", "resnet.layer1.2.bn1.running_var", "resnet.layer1.2.conv2.weight", "resnet.layer1.2.bn2.weight", "resnet.layer1.2.bn2.bias", "resnet.layer1.2.bn2.running_mean", "resnet.layer1.2.bn2.running_var", "resnet.layer1.2.conv3.weight", "resnet.layer1.2.bn3.weight", "resnet.layer1.2.bn3.bias", "resnet.layer1.2.bn3.running_mean", "resnet.layer1.2.bn3.running_var", "resnet.layer2.0.conv3.weight", "resnet.layer2.0.bn3.weight", "resnet.layer2.0.bn3.bias", "resnet.layer2.0.bn3.running_mean", "resnet.layer2.0.bn3.running_var", "resnet.layer2.1.conv3.weight", "resnet.layer2.1.bn3.weight", "resnet.layer2.1.bn3.bias", "resnet.layer2.1.bn3.running_mean", "resnet.layer2.1.bn3.running_var", "resnet.layer2.2.conv1.weight", "resnet.layer2.2.bn1.weight", "resnet.layer2.2.bn1.bias", "resnet.layer2.2.bn1.running_mean", "resnet.layer2.2.bn1.running_var", "resnet.layer2.2.conv2.weight", "resnet.layer2.2.bn2.weight", "resnet.layer2.2.bn2.bias", "resnet.layer2.2.bn2.running_mean", "resnet.layer2.2.bn2.running_var", "resnet.layer2.2.conv3.weight", "resnet.layer2.2.bn3.weight", "resnet.layer2.2.bn3.bias", "resnet.layer2.2.bn3.running_mean", "resnet.layer2.2.bn3.running_var", "resnet.layer2.3.conv1.weight", "resnet.layer2.3.bn1.weight", "resnet.layer2.3.bn1.bias", "resnet.layer2.3.bn1.running_mean", "resnet.layer2.3.bn1.running_var", "resnet.layer2.3.conv2.weight", "resnet.layer2.3.bn2.weight", "resnet.layer2.3.bn2.bias", "resnet.layer2.3.bn2.running_mean", "resnet.layer2.3.bn2.running_var", "resnet.layer2.3.conv3.weight", "resnet.layer2.3.bn3.weight", "resnet.layer2.3.bn3.bias", "resnet.layer2.3.bn3.running_mean", "resnet.layer2.3.bn3.running_var", "resnet.layer3.0.conv3.weight", "resnet.layer3.0.bn3.weight", "resnet.layer3.0.bn3.bias", "resnet.layer3.0.bn3.running_mean", "resnet.layer3.0.bn3.running_var", "resnet.layer3.1.conv3.weight", "resnet.layer3.1.bn3.weight", "resnet.layer3.1.bn3.bias", "resnet.layer3.1.bn3.running_mean", "resnet.layer3.1.bn3.running_var", "resnet.layer3.2.conv1.weight", "resnet.layer3.2.bn1.weight", "resnet.layer3.2.bn1.bias", "resnet.layer3.2.bn1.running_mean", "resnet.layer3.2.bn1.running_var", "resnet.layer3.2.conv2.weight", "resnet.layer3.2.bn2.weight", "resnet.layer3.2.bn2.bias", "resnet.layer3.2.bn2.running_mean", "resnet.layer3.2.bn2.running_var", "resnet.layer3.2.conv3.weight", "resnet.layer3.2.bn3.weight", "resnet.layer3.2.bn3.bias", "resnet.layer3.2.bn3.running_mean", "resnet.layer3.2.bn3.running_var", "resnet.layer3.3.conv1.weight", "resnet.layer3.3.bn1.weight", "resnet.layer3.3.bn1.bias", "resnet.layer3.3.bn1.running_mean", "resnet.layer3.3.bn1.running_var", "resnet.layer3.3.conv2.weight", "resnet.layer3.3.bn2.weight", "resnet.layer3.3.bn2.bias", "resnet.layer3.3.bn2.running_mean", "resnet.layer3.3.bn2.running_var", "resnet.layer3.3.conv3.weight", "resnet.layer3.3.bn3.weight", "resnet.layer3.3.bn3.bias", "resnet.layer3.3.bn3.running_mean", "resnet.layer3.3.bn3.running_var", "resnet.layer3.4.conv1.weight", "resnet.layer3.4.bn1.weight", "resnet.layer3.4.bn1.bias", "resnet.layer3.4.bn1.running_mean", "resnet.layer3.4.bn1.running_var", "resnet.layer3.4.conv2.weight", "resnet.layer3.4.bn2.weight", "resnet.layer3.4.bn2.bias", "resnet.layer3.4.bn2.running_mean", "resnet.layer3.4.bn2.running_var", "resnet.layer3.4.conv3.weight", "resnet.layer3.4.bn3.weight", "resnet.layer3.4.bn3.bias", "resnet.layer3.4.bn3.running_mean", "resnet.layer3.4.bn3.running_var", "resnet.layer3.5.conv1.weight", "resnet.layer3.5.bn1.weight", "resnet.layer3.5.bn1.bias", "resnet.layer3.5.bn1.running_mean", "resnet.layer3.5.bn1.running_var", "resnet.layer3.5.conv2.weight", "resnet.layer3.5.bn2.weight", "resnet.layer3.5.bn2.bias", "resnet.layer3.5.bn2.running_mean", "resnet.layer3.5.bn2.running_var", "resnet.layer3.5.conv3.weight", "resnet.layer3.5.bn3.weight", "resnet.layer3.5.bn3.bias", "resnet.layer3.5.bn3.running_mean", "resnet.layer3.5.bn3.running_var", "resnet.layer4.0.conv3.weight", "resnet.layer4.0.bn3.weight", "resnet.layer4.0.bn3.bias", "resnet.layer4.0.bn3.running_mean", "resnet.layer4.0.bn3.running_var", "resnet.layer4.1.conv3.weight", "resnet.layer4.1.bn3.weight", "resnet.layer4.1.bn3.bias", "resnet.layer4.1.bn3.running_mean", "resnet.layer4.1.bn3.running_var", "resnet.layer4.2.conv1.weight", "resnet.layer4.2.bn1.weight", "resnet.layer4.2.bn1.bias", "resnet.layer4.2.bn1.running_mean", "resnet.layer4.2.bn1.running_var", "resnet.layer4.2.conv2.weight", "resnet.layer4.2.bn2.weight", "resnet.layer4.2.bn2.bias", "resnet.layer4.2.bn2.running_mean", "resnet.layer4.2.bn2.running_var", "resnet.layer4.2.conv3.weight", "resnet.layer4.2.bn3.weight", "resnet.layer4.2.bn3.bias", "resnet.layer4.2.bn3.running_mean", "resnet.layer4.2.bn3.running_var". 
	size mismatch for resnet.layer1.0.conv1.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 1, 1]).
	size mismatch for resnet.layer1.1.conv1.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]).
	size mismatch for resnet.layer2.0.conv1.weight: copying a param with shape torch.Size([128, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 256, 1, 1]).
	size mismatch for resnet.layer2.0.downsample.0.weight: copying a param with shape torch.Size([128, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]).
	size mismatch for resnet.layer2.0.downsample.1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for resnet.layer2.0.downsample.1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for resnet.layer2.0.downsample.1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for resnet.layer2.0.downsample.1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([512]).
	size mismatch for resnet.layer2.1.conv1.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]).
	size mismatch for resnet.layer3.0.conv1.weight: copying a param with shape torch.Size([256, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 512, 1, 1]).
	size mismatch for resnet.layer3.0.downsample.0.weight: copying a param with shape torch.Size([256, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([1024, 512, 1, 1]).
	size mismatch for resnet.layer3.0.downsample.1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for resnet.layer3.0.downsample.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for resnet.layer3.0.downsample.1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for resnet.layer3.0.downsample.1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([1024]).
	size mismatch for resnet.layer3.1.conv1.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([256, 1024, 1, 1]).
	size mismatch for resnet.layer4.0.conv1.weight: copying a param with shape torch.Size([512, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 1024, 1, 1]).
	size mismatch for resnet.layer4.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([2048, 1024, 1, 1]).
	size mismatch for resnet.layer4.0.downsample.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for resnet.layer4.0.downsample.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for resnet.layer4.0.downsample.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for resnet.layer4.0.downsample.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([2048]).
	size mismatch for resnet.layer4.1.conv1.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([512, 2048, 1, 1]).
	size mismatch for fc_1.weight: copying a param with shape torch.Size([128, 640]) from checkpoint, the shape in current model is torch.Size([128, 2176]).

In [None]:
text, image, y= next(iter(models["fasttext_crawl"][dataloader_test_name]))

In [None]:
text

In [None]:
checkpoint = torch.load(
    f"models/BestModel18"
)
models[model_name][model_type_name].load_state_dict(checkpoint["model_state_dict"])
models[model_name]["optimizer"].load_state_dict(checkpoint["optimizer_state_dict"])

# 18_01_22_23_47_41" resnet18, layers: 2