# Using Bert for Question Answering
### Credit to Google's CoLab for helping providing code to get started

In [None]:
import torch
import torchtext
from torchnlp import *
import transformers
from transformers import BertTokenizer, BertForQuestionAnswering, AdamW
from tqdm.notebook import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# To load the model from Internet repository using model name. 
# Use this if you are running from your own copy of the notebooks
bert_model = 'bert-large-uncased-whole-word-masking-finetuned-squad' 

# To load the model from the directory on disk. Use this for Microsoft Learn module, because we have
# prepared all required files for you.
# bert_model = './bert'

tokenizer = BertTokenizer.from_pretrained(bert_model)

model = BertForQuestionAnswering.from_pretrained(bert_model)
optimizer = AdamW(model.parameters(), lr=3e-5)

model.to(device)

In [None]:
# question = "When did the British held American colonies declare their independence?"
question = "What did the American people declare their independence from?"
answer_text = "Independence Day, known colloquially as the Fourth of July, is a Federal Holiday in the United States which commemorates the ratification of the Declaration of Independence by the Second Continental Congress on July 4, 1776, establishing the United States of America. The Founding Father delegates of the Second Continental Congress declared that the Thirteen Colonies were no longer subject (and subordinate) to the monarch of Britain, King George III, and were now united, free, and independent states. The Congress voted to approve independence by passing the Lee Resolution on July 2 and adopted the Declaration of Independence two days later, on July 4."

input_ids = tokenizer.encode(question, answer_text)

print('The input has a total of {:} tokens.'.format(len(input_ids)))

In [None]:
tokens = tokenizer.convert_ids_to_tokens(input_ids)


# For each token and its id...
for token, id in zip(tokens, input_ids):
    
    # If this is the [SEP] token, add some space around it to make it stand out.
    if id == tokenizer.sep_token_id:
        print('')
    
    # Print the token string and its ID in two columns.
    print('{:<12} {:>6,}'.format(token, id))

    if id == tokenizer.sep_token_id:
        print('')
    

In [None]:
# Search the input_ids for the first instance of the `[SEP]` token.
sep_index = input_ids.index(tokenizer.sep_token_id)

# The number of segment A tokens includes the [SEP] token istelf.
num_seg_a = sep_index + 1

# The remainder are segment B.
num_seg_b = len(input_ids) - num_seg_a

# Construct the list of 0s and 1s.
segment_ids = [0]*num_seg_a + [1]*num_seg_b

# There should be a segment_id for every input token.
assert len(segment_ids) == len(input_ids)

In [None]:
outputs = model(torch.tensor([input_ids]).to(device), # The tokens representing our input text.
                             token_type_ids=torch.tensor([segment_ids]).to(device), # The segment IDs to differentiate question from answer_text
                             return_dict=True) 

start_scores = outputs.start_logits
end_scores = outputs.end_logits


In [None]:
answer_start = torch.argmax(start_scores)
answer_end = torch.argmax(end_scores)

# Combine the tokens in the answer and print it out.
answer = ' '.join(tokens[answer_start:answer_end+1])

print(f"Question: {question}")
print('Answer: "' + answer + '"')

# Trying other models

In [None]:
from transformers import LongformerTokenizer, LongformerForQuestionAnswering

# Initialize Longformer tokenizer and model
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
model = LongformerForQuestionAnswering.from_pretrained('allenai/longformer-base-4096')
model.to(device)

In [None]:
# Own block because it's a huge chunk of text. Testing for scalibility (also it's my own built in lore searcher for one of my favorite games)
question = "Where could I find police officer zombies in Project Zomboid?"
answer_text = "A zombie is the player's main antagonist in Project Zomboid. These once human citizens of Knox Country roam the landscape in the thousands. They have an insatiable hunger for human flesh and will not hesitate to kill. The default zombies in Project Zomboid are inspired by George A. Romero's shambler zombies. The zombies can be heavily modified with the use of custom sandbox. Sandbox has many options for modifying the zombies, such as increasing or reducing their speed, primary senses, strength etc. Spawning: By default, zombies generally spawn in higher numbers at urban areas than rural areas. However, most areas won't have the exact same population. Some settlements will usually have larger zombie populations than others, such as Louisville. Downtown areas tend to have higher populations than suburban or outskirt areas. The population will increase and hit its peak after 30 days have passed. Some areas can also become more populated as time passes due to hordes migrating from other areas. Though sometimes the opposite may happen as zombies populating an area may wander away from the area too, such as from gunshots and other sounds from the metagame. Zombies can also spawn in enclosed spaces such as bathrooms or closets and ambush unsuspecting survivors. Behavior: Zombies feasting on an unfortunate victim. Zombies rely on their eyesight and hearing, they're especially drawn to noises such as radios, running vehicles and gunfire. If they hear a noise behind them, they will first look behind and turn around afterward. Zombies prioritize their pathfinding: the sight of prey attracts them first and foremost. With no human flesh in sight, noise is the next priority, regardless of whether it is man-made or not (e.g., thunder). Some zombies can be seen sitting against walls. This gives survivors an easier opportunity of killing that zombie or safely getting past, due to the zombie having to stand up first. When the player dies next to zombies, they will kneel down and begin to eat their corpse. Zombies can also occasionally be found feasting on a corpse, also giving survivors an easier chance of slipping past or killing the zombie(s). Hordes: Horde movement. Zombies tend to roam in hordes, with one of those zombies being the designated horde leader. The zombies in that horde will follow that leader around. Zombie hordes are the most dangerous, and are best avoided at any stage of the game. At irregular intervals, events outside the player's control will cause zombies to migrate en-masse. For example a helicopter might fly overhead, a dog may bark, or gunshots may be fired in the distance by an unseen survivor. These events are part of the metagame, redistributing the zombie hordes over the map to balance gameplay over long periods of time. Needless to say, these mass movements pose great short-term risk to the player. Zombies take the most direct route available to them to reach the noise source, with no concern as to whether this happens to be through the player's shelter. Combat and damage: Zombies attack their prey by approaching and attempting to grab and bite. These attacks hinder the victim's movement. While a lucky survivor may be able to shake off a few uncoordinated attackers, zombies' tendency to move in groups can leave an individual surrounded, doomed to a grisly demise at the hands of the ravenous horde. Attacks by zombies cause four kinds of injury: bites, scratches, lacerations, and deep wounds. Bites are the gravest and inevitably lead to the death and zombification of the subject. It is however more difficult for a zombie to bite, requiring them to have (relatively) protracted close contact with their prey. In contrast, a zombie's attempts to grab its prey are harder to avoid and often cause scratches, or in more severe cases, lacerations. While neither of these spell certain death, they remain dangerous as there is still a chance of contracting the infection (7% for scratches, 25% for lacerations) and slowly succumbing to the disease. While injuring the living is usually the primary concern, zombies are also able to damage inanimate objects such as doors and (player-made) walls. Though a few knocks on a sturdy structure will make little impact, zombies have a tendency to persist. Left to their own devices for a long time, or in larger groups, they will eventually break through. Additionally, the noise one zombie makes as it bashes itself against a door attracts its compatriots, who will often be convinced to join in or simply mill around until something happens. There is a small chance the zombies will lose interest if players hiding inside a structure stay completely silent for a long period of time. However, survivors finding themselves in this predicament would be wise not to rely on such luck. This is especially true in buildings without barricaded windows as zombies can smash through and climb in with no resistance. On seeing prey, zombies' attack strategy is very straightforward: they simply take the most direct path to grab and consume their quarry. This does not always correspond to the most stealthy approach and can often be quite noisy though brushing through vegetation, pounding on doors or barricades, or smashing through windows. They also tend to groan and produce other guttural noises on seeing prey, which can sometimes betray their presence. Despite this, zombies' shuffling movements tend not to make much noise of their own. Survivors should therefore stay alert and be aware of their surroundings, as luck will not always be on their side and zombies will not always announce their arrival. If a zombie does manage to sneak up on a survivor, they have a higher chance of injuring them if the attack is from the rear. Larger groups of zombies gain the ability to grab and pull down their prey for feeding. Zombies can also vault over fences, falling to the ground and briefly granting a window for the player to attack or escape before they get back up. Should they choose to attack, survivors should be wary of staying too close to the fence, as zombies vaulting over them can also perform a lunge attack that is capable of knocking the player off balance (or even to the ground, injuring them). Such an attack leaves the player vulnerable and unable to move for a considerable period of time, often leading swiftly to their death if multiple zombies are present. When more than one zombie vaults over a fence simultaneously, the fence will take damage and eventually collapse if even more vault it. If a survivor or another zombie stands on a downed zombie or a vehicle is parked on-top, the zombie will be rendered unable to stand up, providing an easier opportunity to kill them. Crawlers: A crawler. Occasionally, zombies may be turned upside down on its stomach, and may approach a survivor or a source of noise by crawling. Sometimes, after a zombie is knocked down, instead of getting up, it can decide to crawl around. Other times, a crawler can spawn \"playing dead\" by lying down and may emerge when it hears a noise or some prey. This type of zombie moves extremely slowly, at about a fifth of the speed the standing zombie may move at. Therefore, when a crawler is in a horde, it will usually straggle behind and separate from the rest of the horde. However, because it moves slowly, it makes little noise. Although it may moan every now and then, its method of movement makes little noise and its slow speed makes it harder to hear- even if it were to make a lot of noise to reach its target. Furthermore, this type of zombie can be submerged by tall objects such as crops; survivors should be cautious when exploring unsecured/obscured territories. Because a crawler does NOT get knocked back when hit by a non-fatal blow, it is risky to fight even a single crawler head on with a melee weapon. In addition to this the crawler's main method of attack is to lunge at the player. This attack has a much higher chance to break the skin or result in a bite on the first attack when compared to walkers. However, a crawler is NOT capable of sieging a structure's door or window. Crawlers are unable to go over fences, and if obstructed by one during combat, will attempt to destroy the fence by attacking it. One weakness is that it has trouble turning around. Unlike the regular zombie, a crawler has to push its entire body 180 degrees to turn around. A survivor can manipulate this by going behind the rotating zombie and finishing it off quickly, if the survivor has time. However, this approach will only work on an one-on-one situation. If there are more zombies- regardless of whether they are crawlers or standard walkers, this approach is too risky since it takes a lot of time. Sprinters: Sprinters have several unique running animations. Sprinters are a zombie type that appears in custom sandbox settings and the Studio challenge map. Sprinters are geared towards experienced players who want more dangerous and challenging zombies. It's generally advised to have a character build with high fitness and/or strength. It's also recommended to avoid over-encumbrance, and wear clothing with little to no reduction to run-speed or combat-speed. Sprinters only walk when they're idle; once aggravated, they can easily catch up to a walking survivor. Sprinting zombies will occasionally trip and fall, creating an opportunity to escape or kill them while they're down. A survivor under attack from a sprinting zombie will suffer a slow-down effect in order to simulate being grabbed. Knox Infection: Survivors that get too close to zombies may be bitten, scratched, or otherwise injured. Bites guarantee contracting the infection, but there is only a 7% chance when scratched and a 25% chance when lacerated. If the character has the thick skinned trait, they have a 15% chance of being scratched. There is always a 25% chance of being bitten. When a character has the Knox Infection, the sick moodle appears to verify it, and the moodle's intensity will slowly increase until the character develops a full blown fever and succumbs to it. However, the sick moodle can also happen because the character ate raw or rotten foods, although sometimes the moodle appears out of the blue. When this happens, there will be no zombification, and the moodle will usually go away by itself with rest and by being well fed and won't be of much harm to the character except decreased abilities for a small amount of time. Once their health reaches zero, the survivor will remain dead for a short period of time, before reanimating and standing back up. After being zombified and creating a new character in the same world, the original zombified character can be found, allowing the player to retrieve all their items. However, like all zombies, this zombie will wander, often making it difficult to track them down. Zombified player characters retain all their stats from when they were still alive; this could prove trouble if your own \"zombie\" lived for a long time and has all stats increased before death therefore making them more dangerous than casual zombies. Wounds & decomposition: After months spent wandering around, they will experience decay. The hot and humid weather clearly is not doing wonders for their skin. Survivors using a bladed weapon are able to damage clothing worn by zeds and even inflict visible wounds. Some zombies can spawn with pre-existing wounds. They range from bandaged wounds to even zombies missing their faces or entire scalps. (Decomposition does not currently have any in-game effect as of Build 41) As the game world experiences erosion, the zombies will also experience decay. Their bodies will eventually start to experience putrefaction and lose much of their body mass as time passes. However, the zombies won't die from decomposition, the only way they will completely rot away is after they die. Not all zombies will experience decomposition at the same time as each other. Outfits: Zombies can be found wearing many different types of clothing and accessories. The majority of zombies will be wearing basic attire such as T-shirts, jeans, jackets, etc. Zombies can also be found wearing accessories such as jewelry and watches. Some zombies can even be found wearing backpacks. Unique variants: Police officer zombies have a higher chance of spawning outside of police stations, next to police vehicles, etc. Zombies can also be found wearing unique outfits. These zombies can be found easier at locations that their outfit is related to, such as their work-place or inmates at the Kentucky State Prison. Even areas like farmsteads or trailer parks can have different zombie variants compared to ones found in urban areas. A unique zombie variant that can occasionally be found is the survivor zombie. Survivor zombies are usually found wearing clothing items like boonie hats, hoodies, military camo pants etc. These zombies have large backpacks that contain goodies such as non-perishable food, annotated maps, that indicate where possible supply caches might be located or dangerous areas. Their backpacks also have a high carry capacity and weight reduction. Items: Some zombies can be found with supplies in their inventory after killing them, ranging from common items like cigarettes to rare items such as firearms. Some zombies variants, such as police officers, have a higher chance of being found with a gun on them. Pistols can be seen in their holsters or shotguns attached to their back. Zombies can also sometimes be found with car keys or keys to the house that they were killed inside of, which is especially useful for getting past reinforced doors in gun shops or police stations. Zombies can also occasionally be found wandering around with weapons impaled in them. Early on, these weapons are generally knives, Hand Axes etc. However, as time passes, more types of melee can be found impaled, such as spears, crowbars, even machetes. Project Zomboid's lore seems to indicate that it takes place in an alternate universe where Romero's concept of the modern zombie never came to be. It is possible to encounter a zombie dressed identically to the character Jason Voorhees, of Friday the 13th fame. It is commonplace for people to assume that the Project Zomboid zombies are \"infected\" humans, like in Day Z; The Last of Us; Left 4 Dead; Dying Light; etc. This is incorrect, canonically the Project Zomboid zombies are indeed reanimated human corpses, and not living human beings with an infection. In the lore, children can become infected, turning them into zombies - it was mentioned on day 6 and day 7 on NNR Radio, and day 6 on KnoxTalk Radio. Although, children (and zombie children) will not be added to the game."

In [None]:
# input_ids = tokenizer.encode(question, answer_text)
input_ids = tokenizer(question, answer_text, return_tensors='pt', max_length=4096, truncation=True, padding='max_length')


print('The input has a total of {:} tokens.'.format(len(input_ids)))

In [None]:
tokens = tokenizer.convert_ids_to_tokens(input_ids)

# For each token and its id...
for token, id in zip(tokens, input_ids):
    
    # If this is the [SEP] token, add some space around it to make it stand out.
    if id == tokenizer.sep_token_id:
        print('')
    
    # Print the token string and its ID in two columns.
    print('{:<12} {:>6,}'.format(token, id))

    if id == tokenizer.sep_token_id:
        print('')

In [None]:
# Search the input_ids for the first instance of the `[SEP]` token.
sep_index = input_ids.index(tokenizer.sep_token_id)

# The number of segment A tokens includes the [SEP] token istelf.
num_seg_a = sep_index + 1

# The remainder are segment B.
num_seg_b = len(input_ids) - num_seg_a

# Construct the list of 0s and 1s.
segment_ids = [0]*num_seg_a + [1]*num_seg_b

# There should be a segment_id for every input token.
assert len(segment_ids) == len(input_ids)

In [None]:
outputs = model(torch.tensor([input_ids]).to(device), # The tokens representing our input text.
                             token_type_ids=torch.tensor([segment_ids]).to(device), # The segment IDs to differentiate question from answer_text
                             return_dict=True) 

start_scores = outputs.start_logits
end_scores = outputs.end_logits