# Code from Documentations for importing model and datasets.

## Importing Model

In [4]:
pip install transformers==4.28.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import torch
import requests
from transformers import AutoProcessor, AutoModel, AutoTokenizer
from PIL import Image

In [6]:
processor = AutoProcessor.from_pretrained('microsoft/git-base')
model = AutoModel.from_pretrained('microsoft/git-base')
tokenizer = AutoTokenizer.from_pretrained('microsoft/git-base')

Some weights of the model checkpoint at microsoft/git-base were not used when initializing GitModel: ['output.bias', 'output.weight']
- This IS expected if you are initializing GitModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GitModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Importing Dataset

In [7]:
pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
from concurrent.futures import ThreadPoolExecutor
from functools import partial
import urllib
from PIL import Image
from datasets import load_dataset
from datasets.utils.file_utils import get_datasets_user_agent
import io

In [9]:
num_threads = 20
dataset = load_dataset('poloclub/diffusiondb', 'large_random_1k')



  0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
dataset['train'][0]

{'image': <PIL.WebPImagePlugin.WebPImageFile image mode=RGB size=512x512 at 0x7FC9A6DC78B0>,
 'prompt': 'photo of gorilla wearing glasses reading a book, sitting on bench, 3 5 mm, full - hd ',
 'seed': 1499252293,
 'step': 50,
 'cfg': 7.0,
 'sampler': 'k_lms',
 'width': 512,
 'height': 512,
 'user_name': '5d5e77fa54a7c8a915f8156033987ea43698aaf77b991ff026839c8a46cbe705',
 'timestamp': datetime.datetime(2022, 8, 16, 16, 40, tzinfo=<UTC>),
 'image_nsfw': 0.08135467022657394,
 'prompt_nsfw': 0.00179166195448488}

In [11]:
dataset = dataset.remove_columns(['seed', 'step', 'cfg', 'sampler', 'width', 'height', 'user_name', 'timestamp', 'image_nsfw', 'prompt_nsfw'])

In [12]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'prompt'],
        num_rows: 1000
    })
})

In [13]:
def transform(example_batch):
  images = [x for x in example_batch["image"]]
  captions = [x for x in example_batch["prompt"]]
  inputs = processor(images=images, text=captions, padding="max_length")
  return inputs

In [14]:
train = dataset["train"].train_test_split(test_size=0.2)["train"]
test = dataset["train"].train_test_split(test_size=0.2)["test"]

In [15]:
train

Dataset({
    features: ['image', 'prompt'],
    num_rows: 800
})

In [16]:
train.set_transform(transform)
test.set_transform(transform)

In [17]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="output_conceptual",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_steps=100,
    save_steps=1000,
    evaluation_strategy="steps",
    learning_rate=5e-5,
    weight_decay=0.01,
    remove_unused_columns = False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train,
    eval_dataset=test
)

In [None]:
trainer.train()



In [None]:
trainer.evaluate()

In [None]:
trainer.save_model("your_model")

In [None]:
trainer.push_to_hub("your_model")