#17. Write program demonstrates how to access WordNet, a lexical database, to retrieve synsets and explore word meanings in python.#

In [None]:
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet
def explore_word_meanings(word):
    synsets = wordnet.synsets(word)
    if not synsets:
        print(f"No synsets found for the word '{word}'.")
        return
    for synset in synsets:
        print(f"Synset: {synset.name()}")
        print(f"Definition: {synset.definition()}")
        print(f"Examples: {synset.examples()}")
        print()
example_word = "dog"
explore_word_meanings(example_word)


[nltk_data] Downloading package wordnet to /root/nltk_data...


Synset: dog.n.01
Definition: a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Examples: ['the dog barked all night']

Synset: frump.n.01
Definition: a dull unattractive unpleasant girl or woman
Examples: ['she got a reputation as a frump', "she's a real dog"]

Synset: dog.n.03
Definition: informal term for a man
Examples: ['you lucky dog']

Synset: cad.n.01
Definition: someone who is morally reprehensible
Examples: ['you dirty dog']

Synset: frank.n.02
Definition: a smooth-textured sausage of minced beef or pork usually smoked; often served on a bread roll
Examples: []

Synset: pawl.n.01
Definition: a hinged catch that fits into a notch of a ratchet to move a wheel forward or prevent it from moving backward
Examples: []

Synset: andiron.n.01
Definition: metal supports for logs in a fireplace
Examples: ['the andirons were too hot to touch']

Synset: chase.v.01
Definition: go after with 

#18. Implement a simple FOPC parser for basic logical expressions using python program.#


In [None]:
facts = {("R", "apple", "banana"), ("R", "banana", "cherry"), ("R", "apple", "cherry")}
expressions = ["R(apple, banana)", "R(banana, cherry)", "R(apple, cherry)", "R(pear, orange)"]
for expression in expressions:
    predicate, args = expression.split('(')
    args = args.rstrip(')').split(',')
    if (predicate, args[0], args[1]) in facts:
        result = True
    else:
        result = False
    print(f"{expression}: {result}")


R(apple, banana): False
R(banana, cherry): False
R(apple, cherry): False
R(pear, orange): False


#19. Create a program for word sense disambiguation using the Lesk algorithm using python#

In [None]:
import nltk
nltk.download('wordnet')
nltk.download('punkt')

from nltk.wsd import lesk
from nltk.tokenize import word_tokenize
example_sentence = "The bank was situated near the river bank."
tokenized_sentence = word_tokenize(example_sentence)
target_word = "bank"
sense = lesk(tokenized_sentence, target_word)
if sense:
    print(f"Target word: {target_word}")
    print(f"Best sense: {sense.name()}")
    print(f"Definition: {sense.definition()}")
else:
    print(f"No sense found for the target word '{target_word}'.")


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Target word: bank
Best sense: savings_bank.n.02
Definition: a container (usually with a slot in the top) for keeping money at home


#20. Implement a basic information retrieval system using TF-IDF (Term Frequency-Inverse Document Frequency) for document ranking using python.#

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
# Sample documents
documents = [
    "The quick brown fox jumps over the lazy dog",
    "A brown dog chased the fox",
    "The fox is quick and the dog is lazy",
    "The cat is sitting on the windowsill",
]
# Query
query = "The quick fox"
def tfidf_search(query, documents):
    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    # Calculate TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(documents + [query])
    # Calculate cosine similarity between the query and each document
    cosine_similarities = linear_kernel(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
    # Rank documents based on cosine similarities
    ranked_documents = sorted(enumerate(cosine_similarities), key=lambda x: x[1], reverse=True)
    return ranked_documents
# Perform TF-IDF-based document ranking
results = tfidf_search(query, documents)
# Print the ranked documents
print("Ranked Documents:")
for index, similarity in results:
    print(f"Document {index + 1}: Similarity = {similarity:.4f}")
    print(f"   '{documents[index]}'\n")


Ranked Documents:
Document 1: Similarity = 0.5257
   'The quick brown fox jumps over the lazy dog'

Document 3: Similarity = 0.4848
   'The fox is quick and the dog is lazy'

Document 2: Similarity = 0.3360
   'A brown dog chased the fox'

Document 4: Similarity = 0.1933
   'The cat is sitting on the windowsill'



#21. Create a python program that performs syntax-driven semantic analysis by extracting noun phrases and their meanings from a sentence.#

In [None]:
import spacy
def extract_noun_phrases(sentence):
    # Load the English language model for spaCy
    nlp = spacy.load("en_core_web_sm")
    # Process the input sentence
    doc = nlp(sentence)
    # Extract noun phrases and their meanings
    noun_phrases = []
    for chunk in doc.noun_chunks:
        noun_phrases.append((chunk.text, get_meaning(chunk.root)))
    return noun_phrases
def get_meaning(word):
    # In a real-world scenario, you might use WordNet or another resource for word meanings
    # For simplicity, this example returns a placeholder meaning
    return f"Meaning of '{word.text}' is not determined in this example."
# Example sentence
example_sentence = "The quick brown fox jumps over the lazy dog."
# Perform syntax-driven semantic analysis
semantic_results = extract_noun_phrases(example_sentence)
# Print the results
print("Noun Phrases and Their Meanings:")
for phrase, meaning in semantic_results:
    print(f"Noun Phrase: {phrase}")
    print(f"Meaning: {meaning}\n")


Noun Phrases and Their Meanings:
Noun Phrase: The quick brown fox
Meaning: Meaning of 'fox' is not determined in this example.

Noun Phrase: the lazy dog
Meaning: Meaning of 'dog' is not determined in this example.



#22. Create a python program that performs reference resolution within a text.#


In [None]:
import spacy

def resolve_references(text):
    # Load spaCy English model
    nlp = spacy.load("en_core_web_sm")

    # Process the input text
    doc = nlp(text)

    # Extract entities and their positions
    entities = {ent.start: ent for ent in doc.ents}

    # Resolve pronoun references
    resolved_text = []
    for token in doc:
        if token.text.lower() in ['he', 'him', 'his', 'she', 'her', 'it']:
            # Check if the pronoun is part of an entity
            if token.i in entities:
                resolved_text.append(entities[token.i].text)
            else:
                # Use the pronoun as is if not part of an entity
                resolved_text.append(token.text)
        else:
            resolved_text.append(token.text)

    return ' '.join(resolved_text)

# Example usage
input_text = "John met Mary at the park. He gave her a book."
resolved_text = resolve_references(input_text)

# Display the results
print("Original Text:")
print(input_text)
print("\nResolved Text:")
print(resolved_text)


Original Text:
John met Mary at the park. He gave her a book.

Resolved Text:
John met Mary at the park . He gave her a book .


#23. Develop a python program that evaluates the coherence of a given text.#


In [None]:
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_coherence(text):
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)

    # Create a TF-IDF vectorizer
    vectorizer = TfidfVectorizer(stop_words='english')

    # Compute TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(sentences)

    # Compute cosine similarity between sentences
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

    # Calculate coherence score
    coherence_score = similarity_matrix.mean()

    return coherence_score

if __name__ == "__main__":
    # Example text
    input_text = """
    Natural Language Processing (NLP) is a field of artificial intelligence that focuses on the interaction between computers and humans using natural language. It involves the development of algorithms and models that enable machines to understand, interpret, and generate human-like text. NLP has applications in various domains, including chatbots, sentiment analysis, and language translation.

    Coherence in a text refers to the logical connection and smooth flow of ideas between sentences and paragraphs. A coherent text is easier to understand and retains the reader's attention. Evaluating coherence is crucial in assessing the quality of written communication.

    This program computes the coherence of a given text by analyzing the similarity between sentences using TF-IDF vectors. The higher the coherence score, the more connected and coherent the text is considered.

    Feel free to replace this example text with your own to evaluate the coherence of different texts.
    """

    # Compute coherence score
    coherence_score = compute_coherence(input_text)

    # Display the coherence score
    print(f"Coherence Score: {coherence_score}")


Coherence Score: 0.14863938451879324


# 24. Create a python program that recognizes dialog acts in a given dialog or conversation..#

In [None]:
import spacy
def recognize_dialog_acts(conversation):
    nlp = spacy.load("en_core_web_sm")
    # Process the conversation using spaCy
    doc = nlp(conversation)
    # Extract sentences and their dialog acts
    dialog_acts = []
    for sent in doc.sents:
        # You may customize this logic based on your requirements
        # Here, we assume simple categorization (question, statement, etc.)
        if "?" in sent.text:
            dialog_act = "Question"
        else:
            dialog_act = "Statement"
        dialog_acts.append((sent.text, dialog_act))
    return dialog_acts
if __name__ == "__main__":
    # Example conversation
    conversation = "User: How are you? Bot: I'm doing well. User: What's the weather like today?"
    # Recognize dialog acts
    dialog_acts = recognize_dialog_acts(conversation)
    # Print the recognized dialog acts
    for sentence, dialog_act in dialog_acts:
        print(f"{dialog_act}: {sentence}")


Question: User: How are you?
Statement: Bot: I'm doing well.
Question: User: What's the weather like today?


#25. Utilize the GPT-3 model to generate text based on a given prompt. Make sure to install the OpenAI GPT-3 library in python implementation.#

In [None]:
!pip install openai==0.28
import openai

openai.api_key = "YOUR_API_KEY"

# Example prompt
prompt = "Write a poem about a robot falling in love."

# Use the `Completion` endpoint to generate text
response = openai.Completion.create(
    engine="text-davinci-003",
    prompt=prompt,
    max_tokens=100,
    temperature=0.7,
)

print(response.choices[0].text)


Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m71.7/76.5 kB[0m [31m2.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.10.0
    Uninstalling openai-1.10.0:
      Successfully uninstalled openai-1.10.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.
llmx 0.0.15a0 requires tiktoken, which is not installed.[0m[31m
[0mSuccessfully installed openai-0.28.0


APIRemovedInV1: 

You tried to access openai.Completion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


#26 Implement a machine translation program using the Hugging Face Transformers library, translate English text to French using python.#

In [None]:
!pip install transformers
from transformers import MarianMTModel, MarianTokenizer

def translate_text(input_text, source_lang="en", target_lang="fr"):
    # Load pre-trained model and tokenizer
    model_name = f'Helsinki-NLP/opus-mt-{source_lang}-{target_lang}'
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)

    # Tokenize and translate the input text
    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    translated_ids = model.generate(input_ids)

    # Decode the translated text
    translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)

    return translated_text

if __name__ == "__main__":
    # Example English text
    english_text = "Hello, how are you?"

    # Translate English to French
    french_translation = translate_text(english_text, source_lang="en", target_lang="fr")

    # Display the results
    print("English Text: ", english_text)
    print("French Translation: ", french_translation)





The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]



English Text:  Hello, how are you?
French Translation:  Bonjour, comment allez-vous?
