In [None]:
from google.colab import drive
import os
import json
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import RegexpTokenizer
from nltk import pos_tag

In [None]:
drive.mount('/content/drive')
datadir = "/content/drive/My Drive/CS546Data/"

Mounted at /content/drive


In [None]:
import re

def parse_entities_from_file(file_path):
    entities = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            # Skip lines that don't contain entity definitions
            if line.startswith("<") or line.strip() == '':
                continue
            # Extract entity, ignoring hierarchy indentation
            entity = line.strip().split('\t')[-1]
            entities.append(entity)
    return entities

paths_to_ontology_files = [
    # datadir + 'Ontology/hierarchy.txt',
    datadir + 'Ontology/listEntities.txt'
    # datadir + 'Ontology/listEvents.txt',
    # datadir + 'Ontology/listPredicates.txt',
    # datadir + 'Ontology/listFacts.txt'
]
all_entities = set()

for file_path in paths_to_ontology_files:
    entities_in_file = parse_entities_from_file(file_path)
    all_entities.update(entities_in_file)

# Convert the set to a list and sort it
entities_list = sorted(list(all_entities))

# Write the entities to a text file
with open('entities_list.txt', 'w', encoding='utf-8') as f:
    for entity in entities_list:
        f.write(f"{entity}\n")

In [None]:
def load_entities(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        entities = [line.strip() for line in file.readlines()]
    return entities

entities_list = load_entities('./entities_list.txt')

In [None]:
def tag_entities_in_text(text, entities):
    for entity in entities:
        # Create a pattern that matches the entity
        pattern = re.compile(re.escape(entity), re.IGNORECASE)
        # Replace with the tagged version
        text = pattern.sub(f'[ENTITY]{entity}[/ENTITY]', text)
    return text

In [None]:
def read_and_tag_data(file_path, entities):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    for key, value in data.items():
        tagged_text = tag_entities_in_text(value['description'], entities)
        data[key]['description'] = tagged_text

    return data

tagged_data = read_and_tag_data('path_to_your_minecraft_data.json', entities_list)

In [None]:
def tag_entities(text, entities):
    tagged_text = text.lower()  # Convert text to lowercase for matching
    for entity in entities:
        # Convert entity to lowercase for matching
        lower_entity = entity.lower()
        # Escape special characters in entity names for regex
        escaped_entity = re.escape(lower_entity)
        # Replace entity with tagged version, use word boundaries to match whole words
        tagged_text = re.sub(rf'\b{escaped_entity}\b', f'[ENTITY]{entity}[/ENTITY]', tagged_text)
    return tagged_text

def read_files_to_json(folder_paths, entity_list):
    data_dict = {}

    if os.path.exists('/content/minecraft_data.json'):
        with open('/content/minecraft_data.json', 'r', encoding='utf-8') as json_file:
            data_dict = json.load(json_file)

    for folder_path in folder_paths:
        for filename in os.listdir(folder_path):
            if filename.endswith(".txt"):
                topic = filename.replace(".txt", "")
                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                    content = file.read()
                    # Tag the entities in the content
                    content_with_entities = tag_entities_in_text(content, entity_list)

                data_dict[topic] = {
                    'description': content_with_entities  # Store the content with entities tagged
                }

    data_json = json.dumps(data_dict, indent=4)

    with open('/content/minecraft_data.json', 'w', encoding='utf-8') as json_file:
        json_file.write(data_json)

    return data_dict

# Now call the function with the entity list
folder_paths = [
    datadir + 'KnowledgeDatabase/GamepediaTxt/Blocks',
    datadir + 'KnowledgeDatabase/GamepediaTxt/Entity',
    datadir + 'KnowledgeDatabase/GamepediaTxt/Items',
    datadir + 'KnowledgeDatabase/GamepediaTxt/Other'
]
minecraft_data = read_files_to_json(folder_paths, entities_list)


In [None]:
import re
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

# Function to extract entities from ontology files
def parse_entities_from_file(file_path):
    entities = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            # Skip lines that don't contain entity definitions
            if line.startswith("<") or line.strip() == '':
                continue
            # Extract entity, ignoring hierarchy indentation
            entity = line.strip().split('\t')[-1]
            entities.append(entity)
    return entities

# Function to get the wordnet POS tag
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun

# Function to preprocess text
def preprocess_text(text, entity_list):
    lemmatizer = WordNetLemmatizer()
    tokenizer = RegexpTokenizer(r'\w+|[ENTITY][^/]+[/ENTITY]')  # Adjust the tokenizer to capture entity tags as single tokens
    tokens = tokenizer.tokenize(text)

    processed_tokens = []
    for token in tokens:
        if token.startswith('[ENTITY]'):
            processed_tokens.append(token)  # Keep the entity as is
        else:
            # Lowercase non-entity tokens
            token = token.lower()
            if token not in stopwords.words('english'):
                # Get POS tags for lemmatization
                pos = pos_tag([token])[0][1]
                wordnet_pos = get_wordnet_pos(pos)  # Convert the tag to wordnet format
                # Lemmatize the token
                lemmatized_token = lemmatizer.lemmatize(token, wordnet_pos)
                processed_tokens.append(lemmatized_token)
    return ' '.join(processed_tokens)

# Example usage
# entities_list = parse_entities_from_file('ontology.txt')  # Assuming 'ontology.txt' contains your ontology entities
sample_text = "To create a [ENTITY]Nether Portal[/ENTITY], you need [ENTITY]Obsidian[/ENTITY] which can be mined with a [ENTITY]Diamond Pickaxe[/ENTITY]."
preprocessed_sample = preprocess_text(sample_text, entities_list)
print(preprocessed_sample)

create nether portal need entity obsidian entity mine entity diamond pickaxe entity


In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN  # Default to noun

def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    tokenizer = RegexpTokenizer(r'\w+')
    tokens = tokenizer.tokenize(text.lower())
    tokens = [t for t in tokens if t not in stopwords.words('english')]
    pos_tags = pos_tag(tokens)  # Get POS tags
    tokens = [lemmatizer.lemmatize(word, get_wordnet_pos(pos)) for word, pos in pos_tags]  # Lemmatize with POS tags
    return ' '.join(tokens)

with open('minecraft_data.json', 'r', encoding='utf-8') as f:
    minecraft_data = json.load(f)

preprocessed_data = {key: preprocess_text(value['description']) for key, value in minecraft_data.items()}

with open('preprocessed_minecraft_data.json', 'w', encoding='utf-8') as f:
    json.dump(preprocessed_data, f, indent=4)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
!pip install elasticsearch



In [None]:
es_url = "https://134.209.74.243:9200/"
from elasticsearch import Elasticsearch

es = Elasticsearch(
    ['https://134.209.74.243:9200'],
    verify_certs=False,
    http_auth=('elastic', 'mmjIugA2iaP=y0q2*5=4')
)
# es = Elasticsearch(hosts = [ES_NODES])


  _transport = transport_class(
  es = Elasticsearch(


In [None]:
es.indices.create(index='minecraft')



ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'minecraft'})

In [None]:
for topic, description in preprocessed_data.items():
    print(f"Topic: {topic}")
    print(f"Description: {description[:50]}...")

    doc = {
        'topic': topic,
        'description': description
    }

    try:
        res = es.index(index='minecraft', id=topic, body=doc)
        print(f"Indexed {topic}")
    except Exception as e:
        print(f"Failed to index {topic}: {e}")

Topic: Cauldron
Description: cauldron block hold water cauldron mine use pickax...
Indexed Cauldron
Topic: Banner
Description: banner flag tall decorative block feature field hi...




Indexed Banner
Topic: Andesite
Description: andesite type igneous rock polish andesite polish ...
Indexed Andesite
Topic: Block_of_Coal
Description: block coal also know coal block mineral block also...
Indexed Block_of_Coal
Topic: Block_of_Iron
Description: block iron also know iron block decorative mineral...




Indexed Block_of_Iron
Topic: Cake
Description: cake food block eat player place cake cannot recol...
Indexed Cake
Topic: Chest
Description: chest block store item chest obtain craft break pr...
Indexed Chest
Topic: Cactus
Description: cactus plural cactus cactus plant occur naturally ...




Indexed Cactus
Topic: Air
Description: air block minecraft use represent unoccupied space...
Indexed Air
Topic: Carpet
Description: carpet thin block mainly use decoration carpet min...
Indexed Carpet
Topic: Block_of_Emerald
Description: block emerald also know emerald block decorative m...




Indexed Block_of_Emerald
Topic: Beacon
Description: beacon unique block project light beam skyward pro...
Indexed Beacon
Topic: Block_of_Quartz
Description: block quartz also know quartz block mineral block ...
Indexed Block_of_Quartz
Topic: Activator_Rail
Description: activator rail type rail activate certain minecart...




Indexed Activator_Rail
Topic: Carrot
Description: carrot food item eat player carrot farm harvested ...
Indexed Carrot
Topic: Blocks
Description: block basic unit structure minecraft together buil...
Indexed Blocks
Topic: Clay_(block)
Description: clay block find water clay block mine anything sho...




Indexed Clay_(block)
Topic: Brewing_Stand
Description: brewing stand block use brewing potion splash poti...
Indexed Brewing_Stand
Topic: Block_of_Redstone
Description: block redstone also know redstone block act piston...
Indexed Block_of_Redstone
Topic: Block_of_Diamond
Description: block diamond also know diamond block decorative m...




Indexed Block_of_Diamond
Topic: Button
Description: button non solid block provide temporary redstone ...
Indexed Button
Topic: Bedrock
Description: bedrock indestructible unmineable block bedrock ca...
Indexed Bedrock
Topic: Bookshelf
Description: bookshelf block primarily serve enhance enchant en...




Indexed Bookshelf
Topic: Block_of_Gold
Description: block gold also know gold block decorative mineral...
Indexed Block_of_Gold
Topic: Bed
Description: bed block allow player sleep reset spawn point wit...
Indexed Bed
Topic: Beetroot_Seeds
Description: pocket edition beetroot seed item exclusive pocket...




Indexed Beetroot_Seeds
Topic: Barrier
Description: barrier invisible block appear transparent mixed p...
Indexed Barrier
Topic: Coal_Ore
Description: coal ore mineral block drop coal mine coal ore blo...
Indexed Coal_Ore
Topic: Anvil
Description: anvil block use item repair interface repair renam...




Indexed Anvil
Topic: Cobblestone
Description: cobblestone common block obtain mine stone texture...
Indexed Cobblestone
Topic: Bricks
Description: brick decorative building block brick mine use pic...
Indexed Bricks
Topic: Ladder
Description: ladder wooden block use climb wall either vertical...




Indexed Ladder
Topic: Emerald_Ore
Description: emerald ore rare mineral block game drop emerald m...
Indexed Emerald_Ore
Topic: Mob_head
Description: mob head decorative block mob head break use anyth...
Indexed Mob_head
Topic: Lapis_Lazuli_Block
Description: lapis lazuli block decorative mineral block craft ...




Indexed Lapis_Lazuli_Block
Topic: Command_Block
Description: command block block execute command cannot obtain ...
Indexed Command_Block
Topic: Enchantment_Table
Description: enchantment table block allow player spend experie...
Indexed Enchantment_Table
Topic: Invisible_Bedrock
Description: invisible bedrock invisible indestructible block e...




Indexed Invisible_Bedrock
Topic: Granite
Description: granite type igneous rock polish granite polish ve...
Indexed Granite
Topic: Farmland
Description: farmland technical block seed plant grown make blo...
Indexed Farmland
Topic: Glass
Description: glass decorative fully transparent block glass dro...




Indexed Glass
Topic: Jack_o%27Lantern
Description: jack lantern solid block provide light jack lanter...
Indexed Jack_o%27Lantern
Topic: Leaves
Description: leaf block grow part tree leave obtained shear sil...
Indexed Leaves
Topic: Melon_Seeds
Description: melon seed item use grow melon plant melon seed fi...




Indexed Melon_Seeds
Topic: End_Portal_(block)
Description: end portal end portal frame console edition block ...
Indexed End_Portal_(block)
Topic: Fence
Description: fence wall block cannot jump wood fence broken qui...
Indexed Fence
Topic: Hopper
Description: hopper block use catch item entity transfer item c...




Indexed Hopper
Topic: Dead_Bush
Description: dead bush also know shrubs transparent block form ...
Indexed Dead_Bush
Topic: Diamond_Ore
Description: diamond ore mineral block one valuable elusive blo...
Indexed Diamond_Ore
Topic: Moss_Stone
Description: moss stone block resemble cobblestone moss grow cr...




Indexed Moss_Stone
Topic: Liquid
Description: liquid block name suggest flow spread currently tw...
Indexed Liquid
Topic: Dropper
Description: dropper block use eject item push item another con...
Indexed Dropper
Topic: Daylight_Sensor
Description: daylight sensor block output redstone signal base ...




Indexed Daylight_Sensor
Topic: Iron_Bars
Description: iron bar block serve similar purpose fence 1 block...
Indexed Iron_Bars
Topic: End_Stone
Description: end stone sometimes call ender stone block appear ...
Indexed End_Stone
Topic: Cobweb
Description: cobweb block slow movement cobweb obtain mine use ...




Indexed Cobweb
Topic: Lapis_Lazuli_Ore
Description: lapis lazuli ore ore block lapis lazuli obtain sto...
Indexed Lapis_Lazuli_Ore
Topic: Lily_Pad
Description: lily pad collectable block find grow water swampla...
Indexed Lily_Pad
Topic: Lava
Description: lava fluid block lava cannot obtain item retrieve ...




Indexed Lava
Topic: Monster_Spawner
Description: monster spawner block spawn mob spawner cannot obt...
Indexed Monster_Spawner
Topic: Jukebox
Description: jukebox block use play music disc jukebox broken u...
Indexed Jukebox
Topic: Grass_Block
Description: grass block block generates naturally overworld gr...




Indexed Grass_Block
Topic: Hardened_Clay
Description: harden clay block blast resistance comparable ston...
Indexed Hardened_Clay
Topic: Lever
Description: lever non solid block provide switchable redstone ...
Indexed Lever
Topic: Door
Description: door block use switchable barrier seven different ...




Indexed Door
Topic: Locked_chest
Description: lock chest april fool block introduce block indest...
Indexed Locked_chest
Topic: Cobblestone_Wall
Description: cobblestone wall decorative block 2 type cobblesto...
Indexed Cobblestone_Wall
Topic: Dispenser
Description: dispenser solid block use redstone component dispe...




Indexed Dispenser
Topic: Hay_Bale
Description: hay bale decorative flammable block also feed hors...
Indexed Hay_Bale
Topic: Ender_Chest
Description: ender chest type chest store item ender chest obta...
Indexed Ender_Chest
Topic: Melon_(block)
Description: melon block grow fully grow melon seed melon mine ...




Indexed Melon_(block)
Topic: Glowing_Obsidian
Description: pocket edition glow obsidian pocket edition exclus...
Indexed Glowing_Obsidian
Topic: Fence_Gate
Description: fence gate block share function door fence fence g...
Indexed Fence_Gate
Topic: Crafting_Table
Description: craft table originally call workbench one essentia...




Indexed Crafting_Table
Topic: Iron_Ore
Description: iron ore mineral block find underground common min...
Indexed Iron_Ore
Topic: Dirt
Description: dirt block find abundantly overworld coarse dirt v...
Indexed Dirt
Topic: Nether_Quartz_Ore
Description: nether quartz ore ore available nether nether quar...




Indexed Nether_Quartz_Ore
Topic: Fire
Description: fire harmful non solid block fire cannot obtain it...
Indexed Fire
Topic: Dragon_Egg
Description: dragon egg rarest block survival minecraft single ...
Indexed Dragon_Egg
Topic: Mycelium
Description: mycelium block particle effect resemble tiny spore...




Indexed Mycelium
Topic: Glowstone
Description: glowstone light emit block appear nether break wit...
Indexed Glowstone
Topic: Flower_Pot
Description: flower pot decorative block allow plant flower pot...
Indexed Flower_Pot
Topic: Grass
Description: grass commonly name tall grass long grass non soli...




Indexed Grass
Topic: Grass_Path
Description: pocket edition grass path decorative block current...
Indexed Grass_Path
Topic: Flower
Description: flower naturally occur plant come variety shape co...
Indexed Flower
Topic: Nether_Brick
Description: nether brick block use form nether fortresses neth...




Indexed Nether_Brick
Topic: Gravel
Description: gravel uncommon type block affect gravity gravel b...
Indexed Gravel
Topic: Mushroom
Description: mushroom fungi grow spread dark area mushroom inst...
Indexed Mushroom
Topic: Diorite
Description: diorite type igneous rock polish diorite polish ve...




Indexed Diorite
Topic: Glass_Pane
Description: glass pane transparent block use alternative glass...
Indexed Glass_Pane
Topic: Monster_Egg
Description: monster egg also know silverfish block call silver...
Indexed Monster_Egg
Topic: Gold_Ore
Description: gold ore one rarest type mineral block find underg...




Indexed Gold_Ore
Topic: Ice
Description: ice translucent solid block ice easily destroy wit...
Indexed Ice
Topic: Cocoa
Description: cocoa pod growable fruit jungle tree cocoa pod min...
Indexed Cocoa
Topic: Nether_Wart
Description: nether wart know nether stalk coding plant grow ne...




Indexed Nether_Wart
Topic: Mushroom_(block)
Description: mushroom solid block huge mushroom block quickly b...
Indexed Mushroom_(block)
Topic: Furnace
Description: furnace block use smelt block item convert block i...
Indexed Furnace
Topic: Netherrack
Description: netherrack formerly know netherstone rock like blo...




Indexed Netherrack
Topic: Nether_Reactor_Core
Description: nether reactor core block exclusive pocket pi edit...
Indexed Nether_Reactor_Core
Topic: Detector_Rail
Description: detector rail block transport minecarts use switch...
Indexed Detector_Rail
Topic: Sapling
Description: sapling item grown tree sapling break use tool alw...




Indexed Sapling
Topic: Stone
Description: stone block find abundance overworld stone require...
Indexed Stone
Topic: Tripwire_Hook
Description: tripwire hook block use detect entity player mob i...
Indexed Tripwire_Hook
Topic: Stairs
Description: stair block allow mob player change elevation with...




Indexed Stairs
Topic: Prismarine
Description: prismarine stone like material appear underwater o...
Indexed Prismarine
Topic: Stained_Glass
Description: stain glass dye version glass use decoration purpo...
Indexed Stained_Glass
Topic: Redstone_Repeater
Description: redstone repeater sometimes call diode block use r...




Indexed Redstone_Repeater
Topic: Water
Description: water natural fluid water cannot obtain item matte...
Indexed Water
Topic: Powered_Rail
Description: power rail type rail use decrease increase momentu...
Indexed Powered_Rail
Topic: Snow_(layer)
Description: snow cover block commonly cause snowfall destroyin...




Indexed Snow_(layer)
Topic: Sign
Description: sign non solid block display text sign broken tool...
Indexed Sign
Topic: Stained_Clay
Description: stain clay also know stain hardened clay color var...
Indexed Stained_Clay
Topic: Wood_Planks
Description: wood plank common block use many craft recipe text...




Indexed Wood_Planks
Topic: Packed_Ice
Description: packed ice solid block form ice pack ice obtain us...
Indexed Packed_Ice
Topic: Redstone
Description: redstone flat transparent block transmit power red...
Indexed Redstone
Topic: Pumpkin_Seeds
Description: pumpkin seed item use grow pumpkin plant minecarts...




Indexed Pumpkin_Seeds
Topic: Solid_block
Description: solid block include block transparent term block p...
Indexed Solid_block
Topic: Wood
Description: wood also know log naturally occur block find tree...
Indexed Wood
Topic: Sea_Lantern
Description: sea lanterns underwater light source appear ocean ...




Indexed Sea_Lantern
Topic: Sand
Description: sand block naturally find coast desert sand low bl...
Indexed Sand
Topic: Sandstone
Description: sandstone solid block available yellow red color y...
Indexed Sandstone
Topic: Slime_Block
Description: slime block transparent block unique property slim...




Indexed Slime_Block
Topic: Stonecutter
Description: pocket edition stonecutter pocket edition exclusiv...
Indexed Stonecutter
Topic: Sponge
Description: sponge block use remove water around place turn we...
Indexed Sponge
Topic: Redstone_Torch
Description: redstone torch non solid block use invertible reds...




Indexed Redstone_Torch
Topic: Stone_Bricks
Description: stone bricks one material use stronghold crack sto...
Indexed Stone_Bricks
Topic: Pumpkin
Description: pumpkin block appear grass spawn random cluster ov...
Indexed Pumpkin
Topic: Trapped_Chest
Description: trap chest block store item produce redstone power...




Indexed Trapped_Chest
Topic: Note_Block
Description: note block musical block note block quickly break ...
Indexed Note_Block
Topic: Ore
Description: ore type block minecraft ore primarily collect cra...
Indexed Ore
Topic: Torch
Description: torch non solid block emit light torch break insta...




Indexed Torch
Topic: Soul_Sand
Description: soul sand also know hell sand code slow sand block...
Indexed Soul_Sand
Topic: Weighted_Pressure_Plate
Description: weighted pressure plate block use measure number e...
Indexed Weighted_Pressure_Plate
Topic: Stained_Glass_Pane
Description: stain glass pane dye version regular glass pane us...




Indexed Stained_Glass_Pane
Topic: TNT
Description: tnt explosive block tnt break instantly tool witho...
Indexed TNT
Topic: Wool
Description: wool previously know cloth block derive sheep dyed...
Indexed Wool
Topic: Seeds
Description: seed item use farm seed obtain break grass yield 0...




Indexed Seeds
Topic: Pressure_Plate
Description: pressure plate non solid block use detect player m...
Indexed Pressure_Plate
Topic: Technical_blocks
Description: technical block block cannot acquire without comma...
Indexed Technical_blocks
Topic: Rail
Description: rail also know minecart track non solid block prov...




Indexed Rail
Topic: Trapdoor
Description: trapdoor non solid block use openable barrier wood...
Indexed Trapdoor
Topic: Plants
Description: plant minecraft representation plant life move gro...
Indexed Plants
Topic: Potato
Description: potato food item plant consumed raw cook furnace m...




Indexed Potato
Topic: Slab
Description: slabs half version respective block stone type sla...
Indexed Slab
Topic: Piston
Description: piston block capable push block depend direction f...
Indexed Piston
Topic: Redstone_Comparator
Description: redstone comparator block use redstone circuit mai...




Indexed Redstone_Comparator
Topic: Redstone_Lamp
Description: redstone lamp block produce light activate redston...
Indexed Redstone_Lamp
Topic: Snow
Description: snow full size block version snow layer broken sho...
Indexed Snow
Topic: Sugar_Canes
Description: sugar cane formerly call reed sometimes call papyr...




Indexed Sugar_Canes
Topic: Podzol
Description: podzol dirt type variant block podzol collect use ...
Indexed Podzol
Topic: String
Description: string use craft recipe many tool utility place bl...
Indexed String
Topic: Vines
Description: vine also call ivy non solid block 16 block thickn...




Indexed Vines
Topic: Obsidian
Description: obsidian deep purple black block create lava water...
Indexed Obsidian
Topic: Redstone_Ore
Description: redstone ore ore block redstone obtain redstone or...
Indexed Redstone_Ore
Topic: Cow
Description: cow passive mob find overworld cow spawn herd 4 to...




Indexed Cow
Topic: Creeper
Description: creeper common hostile mob explode close player cr...
Indexed Creeper
Topic: Boat
Description: boat item vehicle entity boat retrieve attack pres...
Indexed Boat
Topic: Bottle_o%27_Enchanting
Description: bottle enchant potion like item use spawn experien...




Indexed Bottle_o%27_Enchanting
Topic: Chicken
Description: chicken egg lay passive mob chicken naturally gene...
Indexed Chicken
Topic: Camera
Description: camera pi edition exclusive item use pocket editio...
Indexed Camera
Topic: Armor_Stand
Description: armor stand entity able hold display wearable item...




Indexed Armor_Stand
Topic: Arrow
Description: arrow ammunition bow dispenser skeleton may drop 0...
Indexed Arrow
Topic: Blaze
Description: blaze mob yellow skin black eye find nether blazes...
Indexed Blaze
Topic: Bat
Description: bat fly passive mob spawn cave bat spawn anywhere ...




Indexed Bat
Topic: Chicken_Jockey
Description: chicken jockey fairly rare appearance baby zombie ...
Indexed Chicken_Jockey
Topic: Cave_Spider
Description: cave spider neutral mob inflict poison cave spider...
Indexed Cave_Spider
Topic: Endermite
Description: endermites smallest hostile mob minecraft use ende...




Indexed Endermite
Topic: Human
Description: human mob take form clone default skin human use c...
Indexed Human
Topic: Item_Frame
Description: item frame item display item block inside empty it...
Indexed Item_Frame
Topic: Guardian
Description: guardian underwater hostile mob guardian spawn nat...




Indexed Guardian
Topic: Mooshroom
Description: mooshrooms unique variation cow mooshrooms spawn m...
Indexed Mooshroom
Topic: Rabbit
Description: rabbit bunny mostly harmless passive mob rabbit na...
Indexed Rabbit
Topic: Mobs
Description: mob live move game entity term mob short mobile mo...




Indexed Mobs
Topic: Wither_Skeleton
Description: wither skeleton dangerous hostile mob spawn nether...
Indexed Wither_Skeleton
Topic: Pig
Description: pig rideable passive mob pig spawn grass block lig...
Indexed Pig
Topic: Experience_Orb
Description: experience orb entity similar item entity orb fade...




Indexed Experience_Orb
Topic: Snow_Golem
Description: snow golems utility mob create snow golem player m...
Indexed Snow_Golem
Topic: Magma_Cube
Description: magma cube hostile mob dark red black skin red ora...
Indexed Magma_Cube
Topic: Ocelot
Description: ocelot tameable passive mob ocelot drop 1 3 experi...




Indexed Ocelot
Topic: Minecart_with_TNT
Description: minecart tnt block tnt inside minecart minecarts t...
Indexed Minecart_with_TNT
Topic: Iron_Golem
Description: iron golems large strong utility mob defend villag...
Indexed Iron_Golem
Topic: Splash_Potion
Description: splash potion variant potion throw splash potion b...




Indexed Splash_Potion
Topic: Villager
Description: villager previously call testificate intelligent p...
Indexed Villager
Topic: Entity
Description: entity encompass dynamic moving object throughout ...
Indexed Entity
Topic: Fire_Charge
Description: fire charge item light fire use fire charge use am...




Indexed Fire_Charge
Topic: Wither
Description: wither float three head bos mob withers create pla...
Indexed Wither
Topic: Firework_Rocket
Description: firework rocket item entity use create decorative ...
Indexed Firework_Rocket
Topic: Thunderstorm
Description: thunderstorm fairly uncommon weather condition tak...




Indexed Thunderstorm
Topic: Enderman
Description: endermen black neutral mob end ability teleport en...
Indexed Enderman
Topic: Wolf
Description: wolf neutral mob ally player wolf spawn naturally ...
Indexed Wolf
Topic: Minecart
Description: minecarts vehicle entity minecarts retrieve attack...




Indexed Minecart
Topic: Ender_Dragon
Description: ender dragon bos mob appear end ender dragon spawn...
Indexed Ender_Dragon
Topic: Minecart_with_Chest
Description: minecarts chest also call chest minecarts storage ...
Indexed Minecart_with_Chest
Topic: Spider
Description: spider common neutral mob unique ability climb wal...




Indexed Spider
Topic: Egg
Description: egg item use craft food item use throwable entity ...
Indexed Egg
Topic: Zombie_Pigman
Description: zombie pigman neutral mob live nether zombie pigma...
Indexed Zombie_Pigman
Topic: The_Player
Description: player character user control minecraft generally ...




Indexed The_Player
Topic: Minecart_with_Command_Block
Description: minecart command block command block inside mineca...
Indexed Minecart_with_Command_Block
Topic: Fishing_Rod
Description: fish rod tool mainly use obtain fish fishing rod c...
Indexed Fishing_Rod
Topic: Slime
Description: slime hostile mob spawn underground swamp biome sl...




Indexed Slime
Topic: Ender_Crystal
Description: ender crystal entity find end ender crystal find a...
Indexed Ender_Crystal
Topic: Skeleton
Description: skeleton undead range hostile mob equip bow skelet...
Indexed Skeleton
Topic: Eye_of_Ender
Description: eye ender ender eye craftable item use locate acti...




Indexed Eye_of_Ender
Topic: Zombie
Description: zombie common undead hostile mob overworld zombie ...
Indexed Zombie
Topic: Witch
Description: witch hostile mob use splash potion range weapon a...
Indexed Witch
Topic: Silverfish
Description: silverfish small bug like hostile mob silverfish d...




Indexed Silverfish
Topic: Elder_Guardian
Description: elder guardian hostile mob spawn underwater ocean ...
Indexed Elder_Guardian
Topic: Ghast
Description: ghasts huge float nether mobs shoot explosive fire...
Indexed Ghast
Topic: Horse
Description: horse tamable mob three game variant horse donkey ...




Indexed Horse
Topic: Snowball
Description: snowball throwable item snowball obtain break snow...
Indexed Snowball
Topic: Ender_Pearl
Description: ender pearl item use teleport endermen may drop 0 ...
Indexed Ender_Pearl
Topic: Painting
Description: painting simple low resolution version canvas pain...




Indexed Painting
Topic: Giant
Description: giant size mobs look like zombie currently purpose...
Indexed Giant
Topic: Sheep
Description: sheep passive mob supply wool majority sheep white...
Indexed Sheep
Topic: Minecart_with_Furnace
Description: minecarts furnace also call furnace minecarts powe...




Indexed Minecart_with_Furnace
Topic: Item_(entity)
Description: item drop block item non block resource appear wor...
Indexed Item_(entity)
Topic: Spider_Jockey
Description: spider jockey rare appearance spider ridden skelet...
Indexed Spider_Jockey
Topic: Particles
Description: particle graphical effect minecraft particle alway...




Indexed Particles
Topic: Minecart_with_Hopper
Description: minecart hopper hopper inside minecart minecarts h...
Indexed Minecart_with_Hopper
Topic: Squid
Description: squid 8 arm mob spawn water squid spawn water laye...
Indexed Squid
Topic: Minecart_with_Spawner
Description: minecart spawner combination minecart monster spaw...




Indexed Minecart_with_Spawner
Topic: Axe
Description: ax tool use ease process collect wood base item bo...
Indexed Axe
Topic: Armor
Description: armor category item provide player certain mob var...
Indexed Armor
Topic: Baked_Potato
Description: bake potato food item eat player bake potato obtai...




Indexed Baked_Potato
Topic: Apple
Description: apple food item eat player oak dark oak leave 0 5 ...
Indexed Apple
Topic: Cookie
Description: cooky food easily obtainable restore lot hunger sa...
Indexed Cookie
Topic: Gold_Ingot
Description: gold ingot metal use craft second tier armor first...




Indexed Gold_Ingot
Topic: Cooked_Porkchop
Description: cook porkchop food item eat player cook porkchop o...
Indexed Cooked_Porkchop
Topic: Dandelion_Yellow
Description: dandelion yellow primary dye color create place da...
Indexed Dandelion_Yellow
Topic: Gunpowder
Description: gunpowder formerly know sulphur item use explosive...




Indexed Gunpowder
Topic: Firework_Star
Description: firework star item use determine color effect shap...
Indexed Firework_Star
Topic: Cooked_Mutton
Description: cook mutton food item make cook raw mutton furnace...
Indexed Cooked_Mutton
Topic: Clay
Description: clay item obtain clay block use craft make brick c...




Indexed Clay
Topic: Gold_Nugget
Description: gold nugget item obtain kill zombie pigman zombie ...
Indexed Gold_Nugget
Topic: Bucket
Description: bucket tool use carry water lava milk bucket appro...
Indexed Bucket
Topic: Enchanted_Book
Description: enchant book item allow player add enchantment cer...




Indexed Enchanted_Book
Topic: Golden_Carrot
Description: golden carrot valuable food item brewing ingredien...
Indexed Golden_Carrot
Topic: Brick
Description: brick item use craft brick block flower pot...
Indexed Brick
Topic: Beetroot
Description: pocket edition beetroot crop exclusive pocket edit...




Indexed Beetroot
Topic: Flint_and_Steel
Description: flint steel tool use light fire chests nether fort...
Indexed Flint_and_Steel
Topic: Cooked_Chicken
Description: cook chicken food item eat player chicken die fire...
Indexed Cooked_Chicken
Topic: Chestplate
Description: chestplates type armor cover upper body player fiv...




Indexed Chestplate
Topic: Bow
Description: bow range weapon fire arrows skeleton 8 5 chance d...
Indexed Bow
Topic: Cocoa_Beans
Description: cocoa bean primary color dye food ingredient cocoa...
Indexed Cocoa_Beans
Topic: Cyan_Dye
Description: cyan dye secondary dye color like dye cyan dye app...




Indexed Cyan_Dye
Topic: Blaze_Powder
Description: blaze powder item make blaze rod obtain blaze neth...
Indexed Blaze_Powder
Topic: Carrot_on_a_Stick
Description: carrot stick item use control saddle pig carrot mu...
Indexed Carrot_on_a_Stick
Topic: Fermented_Spider_Eye
Description: ferment spider eye item use brew potion ferment sp...




Indexed Fermented_Spider_Eye
Topic: Cactus_Green
Description: cactus green primary color dye like dye cactus gre...
Indexed Cactus_Green
Topic: Cooked_Fish
Description: cook fish food obtain cook raw fish cook fish obta...
Indexed Cooked_Fish
Topic: Feather
Description: feather item drop chicken chicken may drop 0 2 fea...




Indexed Feather
Topic: Food
Description: food foodstuff consider creative inventory consuma...
Indexed Food
Topic: Gray_Dye
Description: gray dye secondary color dye like dye gray dye app...
Indexed Gray_Dye
Topic: Glowstone_Dust
Description: glowstone dust item use craft glowstone create thi...




Indexed Glowstone_Dust
Topic: Golden_Apple
Description: golden apple special food item eat player enchant ...
Indexed Golden_Apple
Topic: Cooked_Salmon
Description: cooked salmon obtain raw salmon cook furnace cook ...
Indexed Cooked_Salmon
Topic: Beetroot_Soup
Description: pocket edition beetroot soup liquid food item beet...




Indexed Beetroot_Soup
Topic: Diamond
Description: diamond one rarest material minecraft desert templ...
Indexed Diamond
Topic: Glass_Bottle
Description: glass bottle item use pick water make water bottle...
Indexed Glass_Bottle
Topic: Boots
Description: boot type armor cover foot player five type boot l...




Indexed Boots
Topic: Emerald
Description: emerald one rarest material minecraft currency use...
Indexed Emerald
Topic: Ghast_Tear
Description: ghast tear item drop ghasts kill ghasts drop 0 1 g...
Indexed Ghast_Tear
Topic: Bone_Meal
Description: bone meal material use dye well fertilizer plant l...




Indexed Bone_Meal
Topic: Clownfish
Description: clownfish food item obtain fish common way obtain ...
Indexed Clownfish
Topic: Cooked_Rabbit
Description: cook rabbit food item eat player rabbit drop cook ...
Indexed Cooked_Rabbit
Topic: Book_and_Quill
Description: book quill item use write write book press use ite...




Indexed Book_and_Quill
Topic: Flint
Description: flint raw material block gravel mine 10 chance sin...
Indexed Flint
Topic: Glistering_Melon
Description: glistering melon non edible item use imbue potion ...
Indexed Glistering_Melon
Topic: Bread
Description: bread food item eat player stack bread find severa...




Indexed Bread
Topic: Blaze_Rod
Description: blaze rod item drop blaze kill player tame wolf bl...
Indexed Blaze_Rod
Topic: Book
Description: book item craft paper leather three book drop book...




Indexed Book
Topic: Clock
Description: clock sometimes call watch item display current ga...
Indexed Clock
Topic: Compass
Description: compass item use point world spawn compass found c...
Indexed Compass
Topic: Coal
Description: coal lump item mainly obtain harvest coal ore bloc...




Indexed Coal
Topic: Bowl
Description: bowl container use hold certain meal bowl obtain j...
Indexed Bowl
Topic: Dyeing
Description: dye item use change color wool leather armor harde...
Indexed Dyeing
Topic: Bone
Description: bone item primarily obtain drop skeleton bone find...




Indexed Bone
Topic: Melon
Description: melon name melon slice console edition food item e...
Indexed Melon
Topic: Purple_Dye
Description: purple dye secondary dye color create combine one ...
Indexed Purple_Dye
Topic: Raw_Salmon
Description: raw salmon food item obtain fish common way obtain...




Indexed Raw_Salmon
Topic: Raw_Fish
Description: raw fish food item obtain fish common way obtain r...
Indexed Raw_Fish
Topic: Milk
Description: bucket milk item obtain cow mooshrooms milk obtain...
Indexed Milk
Topic: Helmet
Description: helmet type armor cover head player five type helm...




Indexed Helmet
Topic: Nether_Quartz
Description: nether quartz simply quartz nether exclusive item ...
Indexed Nether_Quartz
Topic: Rabbit%27s_Foot
Description: rabbit foot item drop rabbit rabbit rarely drop ra...
Indexed Rabbit%27s_Foot
Topic: Pickaxe
Description: pickax one commonly used tool game require mine or...




Indexed Pickaxe
Topic: Nether_Star
Description: nether star item drop wither use solely craft beac...
Indexed Nether_Star
Topic: Magma_Cream
Description: magma cream item mainly use brew potion fire resis...
Indexed Magma_Cream
Topic: Lime_Dye
Description: lime dye secondary dye color obtain combine one ca...




Indexed Lime_Dye
Topic: Potion
Description: potion brewable item imbue consumer specific time ...
Indexed Potion
Topic: Light_Blue_Dye
Description: light blue dye primary dye like dye light blue dye...
Indexed Light_Blue_Dye
Topic: Light_Gray_Dye
Description: light gray dye primary dye recipe require 12 unit ...




Indexed Light_Gray_Dye
Topic: Leggings
Description: legging type armor cover low body player five type...
Indexed Leggings
Topic: Magenta_Dye
Description: magenta dye dye make combine one purple dye pink d...
Indexed Magenta_Dye
Topic: Iron_Ingot
Description: iron ingot versatile metal craft ingredient common...




Indexed Iron_Ingot
Topic: Prismarine_Shard
Description: prismarine shard item obtain defeat guardian elder...
Indexed Prismarine_Shard
Topic: Poisonous_Potato
Description: poisonous potato food item poison player poisonous...
Indexed Poisonous_Potato
Topic: Pink_Dye
Description: pink dye dye make pink tulip peony combine rise re...




Indexed Pink_Dye
Topic: Leather
Description: leather versatile craft material cow drop 2 piece ...
Indexed Leather
Topic: Lead
Description: lead also know leash item use leash lead passive m...
Indexed Lead
Topic: Raw_Chicken
Description: raw chicken food item eat player cook furnace make...




Indexed Raw_Chicken
Topic: Items
Description: item object exist within player inventory hand mea...
Indexed Items
Topic: Paper
Description: paper item craft sugar cane chest stronghold libra...
Indexed Paper
Topic: Music_Disc
Description: music discs previously name record item play jukeb...




Indexed Music_Disc
Topic: Orange_Dye
Description: orange dye dye craft orange tulip combine one rise...
Indexed Orange_Dye
Topic: Prismarine_Crystals
Description: prismarine crystal item obtain defeat guardian eld...
Indexed Prismarine_Crystals
Topic: Mushroom_Stew
Description: mushroom stew mushroom soup food item mushroom ste...




Indexed Mushroom_Stew
Topic: Hoe
Description: hoe tool use till dirt grass block farmland block ...
Indexed Hoe
Topic: Nether_Brick_(item)
Description: nether brick item make smelt netherrack furnace...
Indexed Nether_Brick_(item)
Topic: Raw_Beef
Description: raw beef food item eat player cook furnace make st...




Indexed Raw_Beef
Topic: Rose_Red
Description: rise red primary color dye create primarily flower...
Indexed Rose_Red
Topic: Saddle
Description: saddle item place ridable mob saddle find chest in...
Indexed Saddle
Topic: Raw_Rabbit
Description: raw rabbit food item eat player cook furnace make ...




Indexed Raw_Rabbit
Topic: Rabbit_Hide
Description: rabbit hide item drop rabbit rabbit drop 0 1 rabbi...
Indexed Rabbit_Hide
Topic: Horse_Armor
Description: horse armor special type armor give horse wear dun...
Indexed Horse_Armor
Topic: Rabbit_Stew
Description: rabbit stew food item eat player rabbit stew obtai...




Indexed Rabbit_Stew
Topic: Raw_Porkchop
Description: raw porkchop food item eat player cook furnace mak...
Indexed Raw_Porkchop
Topic: Ink_Sac
Description: ink sac dark possible primary color dye ink sac ob...
Indexed Ink_Sac
Topic: Raw_Mutton
Description: raw mutton food item eat player sheep drop 1 2 raw...




Indexed Raw_Mutton
Topic: Pufferfish
Description: pufferfish edible brewing ingredient catch fish pu...
Indexed Pufferfish
Topic: Pumpkin_Pie
Description: pumpkin pie food item eat player villager farmer s...
Indexed Pumpkin_Pie
Topic: Rotten_Flesh
Description: rotten flesh food item eat player risk food poison...




Indexed Rotten_Flesh
Topic: Lapis_Lazuli
Description: lapis lazuli primary color dye mine stone pickaxe ...
Indexed Lapis_Lazuli
Topic: Name_Tag
Description: name tag item use name mobs world dungeon chest ma...
Indexed Name_Tag
Topic: Map
Description: map item use view explore terrain console edition ...




Indexed Map
Topic: Steak
Description: steak food item obtain cow cow mooshroom dy fire d...
Indexed Steak
Topic: Sugar
Description: sugar food item derive sugar cane witch may drop 0...
Indexed Sugar
Topic: Written_Book
Description: write book item create book quill sign icon write ...




Indexed Written_Book
Topic: Wheat
Description: wheat item primarily obtain farm dungeon chest cha...
Indexed Wheat
Topic: Slimeball
Description: slimeball item drop slime tiny slime drop 0 2 slim...
Indexed Slimeball
Topic: Stick
Description: stick item use craft many tool item bonus chest ma...




Indexed Stick
Topic: Sword
Description: sword melee weapon monster spawn sword 8 5 9 5 loo...
Indexed Sword
Topic: Spider_Eye
Description: spider eye poisonous food brewing item spider cave...
Indexed Spider_Eye
Topic: Spawn_Egg
Description: spawn egg item may use spawn mobs directly allow p...




Indexed Spawn_Egg
Topic: Shovel
Description: shovel tool use ease process collect dirt block zo...
Indexed Shovel
Topic: Shears
Description: shear tool shepherd villager sell shear 3 4 emeral...
Indexed Shears
Topic: Tools
Description: tool item use player hold perform action faster ef...




Indexed Tools


In [None]:
def search_minecraft(query):
    # Define the search request
    search_request = {
        "query": {
            "multi_match": {
                "query": query,
                "fields": ["topic", "description"]
            }
        }
    }

    response = es.search(index="minecraft", body=search_request)

    hits = response['hits']['hits']
    results = []
    for hit in hits:
        topic = hit['_source']['topic']
        description = hit['_source']['description']
        results.append((topic, description))

    return results

query = "What is a Cauldron used for?"
results = search_minecraft(query)
for topic, description in results:
    print(f"Topic: {topic}")
    print(f"Description: {description}\n")



Topic: Cauldron
Description: cauldron block hold water cauldron mine use pickaxe mine without pickaxe drop nothing cauldron destroy water inside lose cauldron craft iron ingot single empty cauldron generate witch hut fill cauldron water press use cauldron water bucket cauldron also chance fill water rain upon water cauldron use fill glass bottle turn water bottle wash dye leather armor remove top pattern layer banner use cauldron press use cauldron glass bottle leather armor banner cauldron extinguish mob fire include player fall use include extinguish mobs cause water level cauldron decrease one third use three time empty must refill additional us endermen fill cauldron take damage water cauldron cannot use fill empty bucket water bottle cannot use refill cauldron cauldron fill water bucket nether cauldron act power source redstone comparator cauldron behind possibly separate unpowered solid block comparator output signal strength proportional full cauldron 0 empty 1 one third full 2 

In [None]:
import nltk

question = "What is the use of a redstone comparator in Minecraft?"

tokens = nltk.word_tokenize(question)

In [None]:
pos_tags = nltk.pos_tag(tokens)

In [None]:
keywords = [word for word, pos in pos_tags if pos in ('NN', 'NNS', 'NNP', 'NNPS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ')]

In [None]:
index_name = 'minecraft'

def fetch_indexed_data(index_name):
    indexed_data = {}

    result = es.search(index=index_name, scroll='1m', size=1000)
    scroll_id = result['_scroll_id']
    while len(result['hits']['hits']):
        for hit in result['hits']['hits']:
            doc_id = hit['_id']
            doc_source = hit['_source']
            indexed_data[doc_id] = doc_source
        result = es.scroll(scroll_id=scroll_id, scroll='1m')
        scroll_id = result['_scroll_id']

    return indexed_data

indexed_data = fetch_indexed_data(index_name)



In [None]:
keywords = [keyword.lower() for keyword in keywords]

# Check if any of the keywords match the keys in your indexed data
matching_entities = [entity for entity in indexed_data if any(keyword in entity.lower() for keyword in keywords)]
relevant_data = {entity: indexed_data[entity] for entity in matching_entities}

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

documents = [data['description'] for data in relevant_data.values()]

texts = [question] + documents

vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(texts)

cosine_similarities = linear_kernel(tfidf_matrix[0:1], tfidf_matrix).flatten()

document_ranking = [documents[i - 1] for i in cosine_similarities.argsort()[::-1] if i != 0]



In [None]:
import re
from nltk.tokenize import sent_tokenize

# Function to extract keywords from a text
def extract_keywords(text):
    return set(re.findall(r'\b\w+\b', text.lower()))

# Get the keywords from the user's question
question_keywords = extract_keywords(question)

# Initialize an empty list to store sentences and their scores
candidate_answers = []

# Go through each document in the ranked list
for doc in document_ranking:
    # Tokenize the document into sentences
    sentences = sent_tokenize(doc)

    # Go through each sentence in the document
    for sentence in sentences:
        # Get the keywords in the sentence
        sentence_keywords = extract_keywords(sentence)

        # Compute a score based on keyword overlap
        score = len(question_keywords.intersection(sentence_keywords))

        # Append the sentence and its score to the list of candidates
        candidate_answers.append((sentence, score))

# Sort the candidate answers by their scores in descending order
candidate_answers.sort(key=lambda x: x[1], reverse=True)

# The top-ranked sentence is your answer (or you could return the top N sentences)
answer = candidate_answers[0][0]

# Print the answer
print(answer)

NameError: ignored

In [None]:
!pip install transformers



In [None]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

# Load the pretrained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

def get_answer(question, context):
    # Tokenize the input
    input_text = "[CLS] " + question + " [SEP] " + context + " [SEP]"
    input_ids = tokenizer.encode(input_text)
    token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))]

    # Convert to PyTorch tensors and run the model
    input_ids = torch.tensor([input_ids])
    token_type_ids = torch.tensor([token_type_ids])
    with torch.no_grad():
        outputs = model(input_ids, token_type_ids=token_type_ids)

    # Get the most likely start and end positions for the answer
    answer_start = torch.argmax(outputs.start_logits)
    answer_end = torch.argmax(outputs.end_logits)

    # Get the answer text
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][answer_start:answer_end+1]))

    return answer

# Assume the first document in the ranking is the most relevant
most_relevant_doc = document_ranking[0]

# Get the answer from the most relevant document
answer = get_answer(question, most_relevant_doc)
print(answer)

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


pickaxe mine without pickaxe drop nothing
