<a href="https://colab.research.google.com/github/ShivangRustagi04/megaminds/blob/main/Megaminds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
import re

def extract_relation_phrases(text):
    tokens = word_tokenize(text)
    tagged_tokens = pos_tag(tokens)

    relation_phrases = {'verb_phrases': [], 'conjunctive_phrases': [], 'disjunctive_phrases': [], 'noun_phrases': [], 'abbreviations': set()}
    all_phrases = []

    for i in range(len(tagged_tokens) - 1):
        current_word, current_pos = tagged_tokens[i]
        next_word, next_pos = tagged_tokens[i + 1]

        all_phrases.append(current_word)

        if current_pos.startswith('NN') and next_pos.startswith('VB'):
            relation_phrases['verb_phrases'].append(f'{current_word} {next_word}')

        if current_word.lower() in ('and', 'or'):
            relation_phrases['conjunctive_phrases'].append(f'{current_word} {next_word}')
        elif current_word.lower() == 'or':
            relation_phrases['disjunctive_phrases'].append(f'{current_word} {next_word}')

        if current_pos.startswith('NN') and not next_pos.startswith('VB'):
            relation_phrases['noun_phrases'].append(current_word)

        if current_pos == 'NNP' and next_pos == 'NNP':
            abbreviation = f'{current_word} {next_word}'
            relation_phrases['abbreviations'].add(abbreviation)
            all_phrases.append(abbreviation)

    all_phrases.append(tagged_tokens[-1][0])  # Adding the last word
    return relation_phrases, all_phrases

def extract_noun_phrases(text):
    tokens = word_tokenize(text)
    tagged_tokens = pos_tag(tokens)

    noun_phrases = [word for word, pos in tagged_tokens if pos.startswith('NN') and not pos.startswith('VB')]

    return noun_phrases

def detect_abbreviations(text):
    tokens = word_tokenize(text)
    abbreviation_pattern = re.compile(r'\b[A-Z][A-Z0-9]*\b')
    abbreviations = set(match.group() for token in tokens for match in [abbreviation_pattern.match(token)] if match)

    return abbreviations

def calculate_metrics(extracted, expected):
    true_positives = len(set(extracted) & set(expected))
    false_positives = len(set(extracted) - set(expected))
    false_negatives = len(set(expected) - set(extracted))

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = true_positives / (true_positives + false_positives + false_negatives) if (true_positives + false_positives + false_negatives) > 0 else 0

    return precision, recall, f1_score, accuracy

# Get input from the user
user_input = input("Enter a sentence: ")

# Process and calculate F1 scores
expected_relation_phrases = {
    'verb_phrases': ['i am'],
    'conjunctive_phrases': ['or you', 'and i'],
    'disjunctive_phrases': [],
    'noun_phrases': ['hello', 'i', 'shivang', 'btech'],
    'abbreviations': {'AIDS'}
}

relation_phrases_result, all_phrases = extract_relation_phrases(user_input)
noun_phrases_result = extract_noun_phrases(user_input)
abbreviations_result = detect_abbreviations(user_input)

print("\nExtracted Verb Phrases:")
print(relation_phrases_result['verb_phrases'])

print("\nExtracted Noun Phrases:")
print(noun_phrases_result)

print("\nExtracted Conjunctive Phrases:")
print(relation_phrases_result['conjunctive_phrases'])

print("\nExtracted Disjunctive Phrases:")
print(relation_phrases_result['disjunctive_phrases'])

print("\nDetected Abbreviations:")
print(abbreviations_result)


# Print results and F1 scores
def print_metrics(category, extracted, expected):
    precision, recall, f1_score, accuracy = calculate_metrics(extracted, expected[category])
    print(f"\nMetrics for {category.capitalize()}:")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1_score:.2f}")
    print(f"Accuracy: {accuracy:.2f}")

# Print results and F1 scores
print_metrics('verb_phrases', relation_phrases_result['verb_phrases'], expected_relation_phrases)
print_metrics('conjunctive_phrases', relation_phrases_result['conjunctive_phrases'], expected_relation_phrases)
print_metrics('disjunctive_phrases', relation_phrases_result['disjunctive_phrases'], expected_relation_phrases)
print_metrics('noun_phrases', noun_phrases_result, expected_relation_phrases)
print_metrics('abbreviations', abbreviations_result, expected_relation_phrases)


Enter a sentence: hello i am shivang or rohan and i am currently pursuing btech in AIDS

Extracted Verb Phrases:
['i am', 'i am']

Extracted Noun Phrases:
['hello', 'i', 'shivang', 'rohan', 'i', 'btech', 'AIDS']

Extracted Conjunctive Phrases:
['or rohan', 'and i']

Extracted Disjunctive Phrases:
[]

Detected Abbreviations:
{'AIDS'}

Metrics for Verb_phrases:
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
Accuracy: 1.00

Metrics for Conjunctive_phrases:
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Accuracy: 0.33

Metrics for Disjunctive_phrases:
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
Accuracy: 0.00

Metrics for Noun_phrases:
Precision: 0.67
Recall: 1.00
F1 Score: 0.80
Accuracy: 0.67

Metrics for Abbreviations:
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
Accuracy: 1.00
