In [24]:
from opencage.geocoder import OpenCageGeocode
from transformers import pipeline

# Load a pre-trained BERT-based NER pipeline
ner = pipeline("ner", grouped_entities=True)

# Initialize geocoder
geolocator = Nominatim(user_agent="geoapiExercises")

# Example commands
commands = [
    "zoom_in to Delhi",
    "find directions to Bangalore",
    "navigate to New York",
    "zoom in to the map",
    "find directions to Mishra Pedha",
    "maximize Kgmu"
]
# Initialize with your API key
key = '0188553be00944a0954333a31eae57d9'
geocoder = OpenCageGeocode(key)

# Process commands
for command in commands:
    print(f"\nProcessing command: '{command}'")
    ner_results = ner(command)
    
    # Loop through NER results and handle locations
    for entity in ner_results:
        print(f"Entity: {entity['word']}, Type: {entity['entity_group']}, Confidence: {entity['score']:.4f}")
        
        # Check if the entity type is a location
        if entity['entity_group'] == 'LOC':
            result = geocoder.geocode(entity['word'])
            if result:
                location = result[0]['geometry']
                print(location['lat'], location['lng'])

        else:
            print(f"Skipping non-location entity: {entity['word']}")


No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



Processing command: 'zoom_in to Delhi'
Entity: Delhi, Type: LOC, Confidence: 0.9990
28.6273928 77.1716954

Processing command: 'find directions to Bangalore'
Entity: Bangalore, Type: LOC, Confidence: 0.9988
12.9767936 77.590082

Processing command: 'navigate to New York'
Entity: New York, Type: LOC, Confidence: 0.9994
40.7127281 -74.0060152

Processing command: 'zoom in to the map'

Processing command: 'find directions to Mishra Pedha'
Entity: Mishra Pedha, Type: LOC, Confidence: 0.8867
15.3522734 75.145443

Processing command: 'maximize Kgmu'


In [25]:
def remove_ner_entities(command):
    ner_results = ner(command)
    # Extracting the entities
    entities = [entity['word'] for entity in ner_results]
    # Removing entities from command
    for entity in entities:
        command = command.replace(entity, '').strip()
    return command
cleaned_commands = []    
# Process commands
for command in commands:
    cleaned_command = remove_ner_entities(command)
    cleaned_commands.append(cleaned_command)  # Store cleaned command
    print(f"Original Command: '{command}' => Cleaned Command: '{cleaned_command}'")

Original Command: 'zoom_in to Delhi' => Cleaned Command: 'zoom_in to'
Original Command: 'find directions to Bangalore' => Cleaned Command: 'find directions to'
Original Command: 'navigate to New York' => Cleaned Command: 'navigate to'
Original Command: 'zoom in to the map' => Cleaned Command: 'zoom in to the map'
Original Command: 'find directions to Mishra Pedha' => Cleaned Command: 'find directions to'
Original Command: 'maximize Kgmu' => Cleaned Command: 'maximize Kgmu'


In [26]:
print(cleaned_commands)

['zoom_in to', 'find directions to', 'navigate to', 'zoom in to the map', 'find directions to', 'maximize Kgmu']


In [27]:
import nltk
from nltk.corpus import stopwords

# Download stop words once
nltk.download('stopwords')

# Get the list of English stop words
stop_words = set(stopwords.words('english'))

# Function to remove stop words from a command
def remove_stopwords(command):
    words = command.split()  # Split the command into words
    filtered_words = [word for word in words if word.lower() not in stop_words]  # Remove stop words
    return " ".join(filtered_words)

# Function to remove stop words from a list of commands
def process_commands(commands):
    filtered_commands = [remove_stopwords(command) for command in commands]
    return filtered_commands

filtered_commands = process_commands(cleaned_commands)

# Print original and filtered commands
for original, filtered in zip(commands, filtered_commands):
    print(f"Original Command: {original}")
    print(f"Filtered Command (without stop words): {filtered}")
    print()


Original Command: zoom_in to Delhi
Filtered Command (without stop words): zoom_in

Original Command: find directions to Bangalore
Filtered Command (without stop words): find directions

Original Command: navigate to New York
Filtered Command (without stop words): navigate

Original Command: zoom in to the map
Filtered Command (without stop words): zoom map

Original Command: find directions to Mishra Pedha
Filtered Command (without stop words): find directions

Original Command: maximize Kgmu
Filtered Command (without stop words): maximize Kgmu



[nltk_data] Downloading package stopwords to /home/sds/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [28]:
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embedding(word):
    # Tokenize and get embeddings for a word
    inputs = tokenizer(word, return_tensors="pt")
    outputs = model(**inputs)
    # The BERT model returns multiple embeddings; we take the mean of the last hidden state
    return torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()

def classify_command_with_best_match(command):
    # Define target keywords and their respective actions
    target_keywords = {
        "zoom": "Zoom",
        "directions": "Directions",
        "search": "Search",
        "navigate": "Navigate"
    }
    
    # Get embeddings for all target keywords
    target_embeddings = {word: get_embedding(word) for word in target_keywords}
    
    # Split the command into words
    words_in_command = command.split()

    # Initialize an empty list to store the similarity matrix
    similarity_matrix = []

    # Store the best matching word for each target keyword
    best_matches = {key: {"word": None, "similarity": 0.0} for key in target_keywords}

    # Loop over each word in the command
    for word in words_in_command:
        word_embedding = get_embedding(word)
        
        # List to store similarities for the current word
        word_similarities = []
        
        # Compare the word's embedding with all target embeddings
        for target_word, target_embedding in target_embeddings.items():
            similarity = cosine_similarity(word_embedding, target_embedding)[0][0]  # Extract scalar value
            word_similarities.append(similarity)  # Add similarity score to the list for this word

            # Check if this word is the best match for the current target_word
            if similarity > best_matches[target_word]["similarity"]:
                best_matches[target_word]["similarity"] = similarity
                best_matches[target_word]["word"] = word
        
        # Add the similarity list for the current word to the matrix
        similarity_matrix.append(word_similarities)
    
    # Return the similarity matrix, target keywords, and the best match for each target keyword
    return similarity_matrix, list(target_keywords.keys()), best_matches

# Example usage with a list of commands
commands =filtered_commands

# Run the classification with best match detection
for command in commands:
    similarity_matrix, target_words, best_matches = classify_command_with_best_match(command)
    
    print(f"\nCommand: {command}")
    
    # Display the best matching word for each target keyword
    print("\nBest Matches:")
    best_commands = {}
    for target_word, match_info in best_matches.items():
        best_commands[target_word] = match_info['word']

    # Output the best command for each target keyword
    print("\nBest Commands:")
    for target, match in best_commands.items():
        print(f"Best command for '{target}': {match}")



Command: zoom_in

Best Matches:

Best Commands:
Best command for 'zoom': zoom_in
Best command for 'directions': zoom_in
Best command for 'search': zoom_in
Best command for 'navigate': zoom_in

Command: find directions

Best Matches:

Best Commands:
Best command for 'zoom': find
Best command for 'directions': directions
Best command for 'search': find
Best command for 'navigate': find

Command: navigate

Best Matches:

Best Commands:
Best command for 'zoom': navigate
Best command for 'directions': navigate
Best command for 'search': navigate
Best command for 'navigate': navigate

Command: zoom map

Best Matches:

Best Commands:
Best command for 'zoom': zoom
Best command for 'directions': map
Best command for 'search': zoom
Best command for 'navigate': zoom

Command: find directions

Best Matches:

Best Commands:
Best command for 'zoom': find
Best command for 'directions': directions
Best command for 'search': find
Best command for 'navigate': find

Command: maximize Kgmu

Best Matches:

In [37]:
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity
from collections import Counter

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embedding(word):
    # Tokenize and get embeddings for a word
    inputs = tokenizer(word, return_tensors="pt")
    outputs = model(**inputs)
    # The BERT model returns multiple embeddings; we take the mean of the last hidden state
    return torch.mean(outputs.last_hidden_state, dim=1).detach().numpy()

def classify_command_with_best_match(command):
    # Define target keywords and their respective actions
    target_keywords = {
        "zoom": "Zoom",
        "directions": "Directions",
        "search": "Search",
        "navigate": "Navigate"
    }
    
    # Get embeddings for all target keywords
    target_embeddings = {word: get_embedding(word) for word in target_keywords}
    
    # Split the command into words
    words_in_command = command.split()

    # Store the best matching word for each target keyword
    best_matches = {key: {"word": None, "similarity": 0.0} for key in target_keywords}

    # Loop over each word in the command
    for word in words_in_command:
        word_embedding = get_embedding(word)
        
        # Compare the word's embedding with all target embeddings
        for target_word, target_embedding in target_embeddings.items():
            similarity = cosine_similarity(word_embedding, target_embedding)[0][0]  # Extract scalar value

            # Check if this word is the best match for the current target_word
            if similarity > best_matches[target_word]["similarity"]:
                best_matches[target_word]["similarity"] = similarity
                best_matches[target_word]["word"] = word

    return best_matches

# Example usage with a list of commands
commands = filtered_commands

# Initialize a Counter to keep track of the best matches
match_counter = Counter()

# Run the classification with best match detection
for command in commands:
    best_matches = classify_command_with_best_match(command)
    
    print(f"\nCommand: {command}")
    
    # Display the best matching word for each target keyword
    print("\nBest Matches:")
    for target_word, match_info in best_matches.items():
        best_word = match_info['word']
        match_counter[best_word] += 1  # Count occurrences of the best matching words
        print(f"Best command for '{target_word}': {best_word} (Cosine Similarity: {match_info['similarity']:.4f})")

# Find the word with maximum occurrences and its cosine similarity
if match_counter:
    most_common_word, most_common_count = match_counter.most_common(1)[0]
    print(f"\nMost Common Match: '{most_common_word}' occurs {most_common_count} times.")

    # Find the cosine similarity for this word across all commands
    max_similarity = 0.0
    for command in commands:
        best_matches = classify_command_with_best_match(command)
        for target_word, match_info in best_matches.items():
            if match_info['word'] == most_common_word:
                max_similarity = max(max_similarity, match_info['similarity'])

    print(f"Maximum Cosine Similarity for '{most_common_word}': {max_similarity:.4f}")
else:
    print("No matches found.")



Command: zoom

Best Matches:
Best command for 'zoom': zoom (Cosine Similarity: 1.0000)
Best command for 'directions': zoom (Cosine Similarity: 0.7937)
Best command for 'search': zoom (Cosine Similarity: 0.8296)
Best command for 'navigate': zoom (Cosine Similarity: 0.8380)

Command: find directions

Best Matches:
Best command for 'zoom': find (Cosine Similarity: 0.8052)
Best command for 'directions': directions (Cosine Similarity: 1.0000)
Best command for 'search': find (Cosine Similarity: 0.8856)
Best command for 'navigate': find (Cosine Similarity: 0.8804)

Command: navigate

Best Matches:
Best command for 'zoom': navigate (Cosine Similarity: 0.8380)
Best command for 'directions': navigate (Cosine Similarity: 0.8343)
Best command for 'search': navigate (Cosine Similarity: 0.8668)
Best command for 'navigate': navigate (Cosine Similarity: 1.0000)

Command: zoom map

Best Matches:
Best command for 'zoom': zoom (Cosine Similarity: 1.0000)
Best command for 'directions': map (Cosine Simila

In [38]:
# restaurants problem and maximize is getting mapped to directions because of the cosine relation

In [39]:
#find directions - multiple word commands