<a href="https://colab.research.google.com/github/AlinZohari/InformationExtraction/blob/main/GoogleColab_SimpleTrans_TuneQAmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install simpletransformers

In [None]:
import requests
import json

url = "https://raw.githubusercontent.com/AlinZohari/InformationExtraction/main/data/QA_model/train.json"
response = requests.get(url)
train = response.json()

In [None]:
url = "https://raw.githubusercontent.com/AlinZohari/InformationExtraction/main/data/QA_model/validation.json"
response = requests.get(url)
validation = response.json()

In [None]:
import torch
print(torch.cuda.is_available())


In [None]:
train

In [None]:
validation

In [None]:
import logging

from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs


In [None]:

model_type="bert"
model_name= "bert-base-cased"

if model_type == "bert":
    model_name = "bert-base-cased"

elif model_type == "roberta":
    model_name = "roberta-base"

elif model_type == "distilbert":
    model_name = "distilbert-base-cased"

elif model_type == "distilroberta":
    model_type = "roberta"
    model_name = "distilroberta-base"

elif model_type == "electra-base":
    model_type = "electra"
    model_name = "google/electra-base-discriminator"

elif model_type == "electra-small":
    model_type = "electra"
    model_name = "google/electra-small-discriminator"

elif model_type == "xlnet":
    model_name = "xlnet-base-cased"

In [None]:

### Advanced Methodology
train_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "use_cached_eval_features": True,
    "output_dir": f"outputs/{model_type}",
    "best_model_dir": f"outputs/{model_type}/best_model",
    "evaluate_during_training": True,
    "max_seq_length": 128,
    "num_train_epochs": 5,
    "evaluate_during_training_steps": 1000,
    "wandb_project": "Question Answer Application",
    "wandb_kwargs": {"name": model_name},
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "n_best_size":3,
    # "use_early_stopping": True,
    # "early_stopping_metric": "mcc",
    # "n_gpu": 2,
    # "manual_seed": 4,
    # "use_multiprocessing": False,
    "train_batch_size": 128,
    "eval_batch_size": 64,
    # "config": {
    #     "output_hidden_states": True
    # }
}

In [None]:
model = QuestionAnsweringModel(
    model_type,model_name, args=train_args
)

In [None]:
# Train the model
model.train_model(train, eval_data=validation)

In [None]:
# Evaluate the model
result, texts = model.eval_model(validation)

In [None]:
url = "https://raw.githubusercontent.com/AlinZohari/InformationExtraction/main/data/authorize_doc/StarlinkGen2_FCC-22-91A1.txt"
response = requests.get(url)
# Ensure the request was successful
if response.status_code == 200:
    context = response.text
else:
    print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")


In [None]:
#Define your list of questions
questions = [
    {
        "question": "What's the name of the satellite constellation the company seeks to deploy or operate?",
        "id": "const_name"
    },
    {
        "question": "On which date was the document released?",
        "id": "date_release"
    },
    {
        "question": "By which date must the company launch and operate half of its satellites?",
        "id": "date_50"
    },
    {
        "question": "By which date is the company expected to have all its satellites operational?",
        "id": "date_100"
    },
    {
        "question": "How many satellites is the company authorized to deploy and operate for this constellation?",
        "id": "total_sat_const"
    },
    {
        "question": "At which authorized altitudes will the company deploy its satellites?",
        "id": "altitude"
    },
    {
        "question": "What are the authorized satellite inclinations within the corresponding altitudes?",
        "id": "inclination"
    },
    {
        "question": "How many orbital planes, corresponding to given altitudes and inclinations, has the company been authorized for?",
        "id": "number_orb_plane"
    },
    {
        "question": "How many satellites are allocated to each orbital plane?",
        "id": "total_sat_per_orb_plane"
    },
    {
        "question": "How many satellites, for each altitude and inclination, are there across all matching orbital planes?",
        "id": "total_sat_per_alt_incl"
    },
    {
        "question": "What is the satellite's expected operational lifetime in years?",
        "id": "operational_lifetime"
    }
]

In [None]:
to_predict = [
    {
        "context": context,
        "qas": questions
    }
]

In [None]:

answers, probabilities = model.predict(to_predict)

In [None]:
answers