**Data Loading**

In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        data=pd.read_csv(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Data Conversion**

In [None]:
x_data = []
for index, row in data.iterrows():
    train_data.append({
        "context": row["Answer"],
        "qas": [{
            "question": row["Question"],
            "id": str(index),
            "answers": [{
                "text": row["Answer"],
                "answer_start": 0
            }]
        }]
    })
train_data, test_data = train_test_split(x_data, test_size=0.25, random_state=42)

**BERT**

In [1]:
import logging
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import wandb
import os
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

os.environ["WANDB_HTTP_TIMEOUT"] = "180"
wandb.init(project="MedQuad", entity="hashim")

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = {
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    'num_train_epochs': 3,
    'learning_rate': 3e-5,
    'n_best_size': 5,
    'max_seq_length': 384,
    'doc_stride': 128,
    'train_batch_size': 12,
    'gradient_accumulation_steps': 8
     'wandb_project': 'MedQuad'
}

model = QuestionAnsweringModel(
    "bert", "bert-base-uncased", args=model_args, use_cuda=True
)

# Train the model
model.train_model(train_data)

# Evaluate the model
results, model_outputs, wrong_predictions = model.eval_model(test_data)

print(results)

# Function to calculate BLEU score
def calculate_bleu(actual_answers, predicted_answers):
    scores = []
    for actual, predicted in zip(actual_answers, predicted_answers):
        reference = actual.split()  # Actual answer tokens
        candidate = predicted.split()  # Predicted answer tokens
        score = sentence_bleu([reference], candidate)
        scores.append(score)
    return sum(scores) / len(scores)  # Return average BLEU score

bleu_score = calculate_bleu(actual_answers, predicted_answers)
print("Average BLEU Score:", bleu_score)

# Function to calculate ROUGE scores
def calculate_rouge(actual_answers, predicted_answers):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = {key: [] for key in ['rouge1', 'rouge2', 'rougeL']}
    for actual, predicted in zip(actual_answers, predicted_answers):
        score = scorer.score(actual, predicted)
        for key in scores:
            scores[key].append(score[key].fmeasure)  # We are using the F1 measure here

    # Calculate average scores
    avg_scores = {key: np.mean(value) for key, value in scores.items()}
    return avg_scores

rouge_scores = calculate_rouge(actual_answers, predicted_answers)
print("ROUGE Scores:", rouge_scores)

ModuleNotFoundError: No module named 'simpletransformers'

**Mobile BERT**

In [None]:
import logging
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import wandb
import os
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

os.environ["WANDB_HTTP_TIMEOUT"] = "180"
wandb.init(project="MedQuad", entity="hashim")

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = {
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    'num_train_epochs': 3,
    'learning_rate': 3e-5,
    'n_best_size': 5,
    'max_seq_length': 384,
    'doc_stride': 128,
    'train_batch_size': 12,
    'gradient_accumulation_steps': 8
     'wandb_project': 'MedQuad'
}

model = QuestionAnsweringModel(
    "bert", "google/mobilebert-uncased", args=model_args, use_cuda=True
)

# Train the model
model.train_model(train_data)

# Evaluate the model
results, model_outputs, wrong_predictions = model.eval_model(test_data)

print(results)

# Function to calculate BLEU score
def calculate_bleu(actual_answers, predicted_answers):
    scores = []
    for actual, predicted in zip(actual_answers, predicted_answers):
        reference = actual.split()  # Actual answer tokens
        candidate = predicted.split()  # Predicted answer tokens
        score = sentence_bleu([reference], candidate)
        scores.append(score)
    return sum(scores) / len(scores)  # Return average BLEU score

bleu_score = calculate_bleu(actual_answers, predicted_answers)
print("Average BLEU Score:", bleu_score)

# Function to calculate ROUGE scores
def calculate_rouge(actual_answers, predicted_answers):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = {key: [] for key in ['rouge1', 'rouge2', 'rougeL']}
    for actual, predicted in zip(actual_answers, predicted_answers):
        score = scorer.score(actual, predicted)
        for key in scores:
            scores[key].append(score[key].fmeasure)  # We are using the F1 measure here

    # Calculate average scores
    avg_scores = {key: np.mean(value) for key, value in scores.items()}
    return avg_scores

rouge_scores = calculate_rouge(actual_answers, predicted_answers)
print("ROUGE Scores:", rouge_scores)

**ROBERT**

In [None]:
import logging
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import wandb
import os
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

os.environ["WANDB_HTTP_TIMEOUT"] = "180"
wandb.init(project="MedQuad", entity="hashim")

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = {
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    'num_train_epochs': 3,
    'learning_rate': 3e-5,
    'n_best_size': 5,
    'max_seq_length': 384,
    'doc_stride': 128,
    'train_batch_size': 12,
    'gradient_accumulation_steps': 8
     'wandb_project': 'MedQuad'
}

model = QuestionAnsweringModel(
    "roberta", "roberta-base", args=model_args, use_cuda=True
)

# Train the model
model.train_model(train_data)

# Evaluate the model
results, model_outputs, wrong_predictions = model.eval_model(test_data)

print(results)

# Function to calculate BLEU score
def calculate_bleu(actual_answers, predicted_answers):
    scores = []
    for actual, predicted in zip(actual_answers, predicted_answers):
        reference = actual.split()  # Actual answer tokens
        candidate = predicted.split()  # Predicted answer tokens
        score = sentence_bleu([reference], candidate)
        scores.append(score)
    return sum(scores) / len(scores)  # Return average BLEU score

bleu_score = calculate_bleu(actual_answers, predicted_answers)
print("Average BLEU Score:", bleu_score)

# Function to calculate ROUGE scores
def calculate_rouge(actual_answers, predicted_answers):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = {key: [] for key in ['rouge1', 'rouge2', 'rougeL']}
    for actual, predicted in zip(actual_answers, predicted_answers):
        score = scorer.score(actual, predicted)
        for key in scores:
            scores[key].append(score[key].fmeasure)  # We are using the F1 measure here

    # Calculate average scores
    avg_scores = {key: np.mean(value) for key, value in scores.items()}
    return avg_scores

rouge_scores = calculate_rouge(actual_answers, predicted_answers)
print("ROUGE Scores:", rouge_scores)

In [3]:
!pip install torch torchvision torchaudio



In [2]:
!pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.70.0-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting streamlit (from simpletransformers)
  Downloading streamlit-1.34.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit->simpletransformers)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog>=2.1.5 (from streamlit->simpletransformers)
  Downloading watchdog-4.0.0-py3-none-manylinux2014_x86_64.whl.metadata (37 kB)
Downloading simpletransformers-0.70.0-py3-none-any.whl (315 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 