# Github Link: https://github.com/HilalKocak/Deep-Learning/tree/master

In [None]:
pip install transformers torch datasets


In [5]:
from datasets import Dataset, DatasetDict

data = {
    "train": Dataset.from_dict({
        "context": [
            "Mevlana, 13. yüzyıl Pers şairi, yargıç ve İslam alimi.",
            "Mevlana Celaleddin-i Rumi, Sufizmin en önemli temsilcilerinden biridir.",
            "Şems ile Mevlana'nın dostluğu, Mevlana'nın şiirlerinde derin bir iz bırakmıştır."
        ],
        "question": [
            "Mevlana hangi yüzyılda yaşamıştır?",
            "Mevlana'nın tam adı nedir?",
            "Mevlana'nın hayatında Şems'in rolü nedir?"
        ],
        "answers": [
            {"text": ["13. yüzyıl"], "answer_start": [9]},
            {"text": ["Mevlana Celaleddin-i Rumi"], "answer_start": [0]},
            {"text": ["dostluğu"], "answer_start": [15]}
        ]
    }),
    "test": Dataset.from_dict({
        "context": [
            "Mevlana, Mevlevilik tarikatının kurucusudur."
        ],
        "question": [
            "Mevlana hangi tarikatın kurucusudur?"
        ],
        "answers": [
            {"text": ["Mevlevilik"], "answer_start": [9]}
        ]
    })
}


dataset = DatasetDict(data)


In [None]:
from transformers import RobertaTokenizerFast, RobertaForQuestionAnswering

tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
model = RobertaForQuestionAnswering.from_pretrained('roberta-base')


In [7]:
def prepare_features(examples):

    tokenized_inputs = tokenizer(
        examples['question'], examples['context'],
        truncation=True, padding="max_length", max_length=512,
        return_offsets_mapping=True
    )
    offset_mapping = tokenized_inputs.pop("offset_mapping")

    start_positions = []
    end_positions = []

    for i, offsets in enumerate(offset_mapping):

        answer = examples['answers'][i]
        start_char = answer['answer_start'][0]
        end_char = start_char + len(answer['text'][0])


        start_position = next((j for j, offset in enumerate(offsets) if offset[0] <= start_char < offset[1]), None)
        end_position = next((j for j, offset in enumerate(offsets) if offset[0] < end_char <= offset[1]), None)


        if start_position is None or end_position is None:
            start_position = 0
            end_position = 0

        start_positions.append(start_position)
        end_positions.append(end_position)

    tokenized_inputs.update({
        'start_positions': start_positions,
        'end_positions': end_positions
    })
    return tokenized_inputs


dataset = dataset.map(prepare_features, batched=True)


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

In [None]:
pip install accelerate -U


In [None]:
pip install transformers[torch] -U


In [None]:
from transformers import RobertaTokenizerFast, RobertaForQuestionAnswering


tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
model = RobertaForQuestionAnswering.from_pretrained('roberta-base')


In [9]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test']
)

trainer.train()


Epoch,Training Loss,Validation Loss
1,No log,6.1719
2,No log,6.17041
3,No log,6.167393


TrainOutput(global_step=3, training_loss=6.240360260009766, metrics={'train_runtime': 78.0721, 'train_samples_per_second': 0.115, 'train_steps_per_second': 0.038, 'total_flos': 2351670810624.0, 'train_loss': 6.240360260009766, 'epoch': 3.0})

In [18]:
from transformers import pipeline

qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer)

def ask_question(context, question):
    result = qa_pipeline({
        'context': context,
        'question': question
    })
    return result['answer']


context = "Mevlana, on üçüncü yüzyıl Pers şairi, yargıç ve İslam alimi. Mevlana Celaleddin-i Rumi, Sufizmin en önemli temsilcilerinden biridir."
question = "Mevlana'nın tam adı nedir?"


answer = ask_question(context, question)
print(f"Soru: {question}")
print(f"Cevap: {answer}")


Soru: Mevlana'nın tam adı nedir?
Cevap: Mevlana Celaleddin-i Rumi,


In [26]:
context = "Mevlana Jalaluddin Rumi was born on September 30, 1207, in the city of Balkh, present-day Afghanistan. His father, Bahaeddin Walad, was a noted scholar of the time. In his early childhood, Mevlana and his family fled the Mongol invasions, moving first to Baghdad, then to Karaman, and finally settling in Konya, Turkey. Konya became the place where Mevlana deepened his studies in theology and mysticism and penned his most significant works."
question = "Where did Mevlana born?"


answer = ask_question(context, question)
print(f"Soru: {question}")
print(f"Cevap: {answer}")


Soru: Where did Mevlana born?
Cevap: moving first to Baghdad, then to
