In [None]:
#Q2: Named Entity Recognition with SpaCy

import spacy

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

def extract_named_entities(sentence):
    # Process the sentence with spaCy
    doc = nlp(sentence)

    # Loop through the named entities in the doc
    for ent in doc.ents:
        print(f"Entity Text: {ent.text}")
        print(f"Entity Label: {ent.label_}")
        print(f"Start Position: {ent.start_char}")
        print(f"End Position: {ent.end_char}")
        print("-" * 40)

# Example sentence
sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."
extract_named_entities(sentence)

##1. Difference Between NER and POS Tagging
'''
Named Entity Recognition (NER) identifies and classifies named entities in text (like people, organizations, dates, locations).
Example: "Barack Obama" → PERSON
Part-of-Speech (POS) Tagging assigns grammatical roles to words (like noun, verb, adjective).
Example: "Obama" → NNP (Proper Noun)
'''
#2. Two Real-World Applications of NER
'''
1. Financial News Monitoring

NER extracts company names, stock tickers, dates, and events from news articles to help analysts track market movements or automate trading decisions.
2. Search Engines

NER helps improve search results by identifying entities in queries (e.g., distinguishing "Apple" the company from "apple" the fruit).
'''


Entity Text: Barack Obama
Entity Label: PERSON
Start Position: 0
End Position: 12
----------------------------------------
Entity Text: 44th
Entity Label: ORDINAL
Start Position: 27
End Position: 31
----------------------------------------
Entity Text: the United States
Entity Label: GPE
Start Position: 45
End Position: 62
----------------------------------------
Entity Text: the Nobel Peace Prize
Entity Label: WORK_OF_ART
Start Position: 71
End Position: 92
----------------------------------------
Entity Text: 2009
Entity Label: DATE
Start Position: 96
End Position: 100
----------------------------------------


In [None]:
#Q3: Scaled Dot-Product Attention

import numpy as np
import scipy.special

def scaled_dot_product_attention(Q, K, V):
    # Step 1: Compute the dot product of Q and Kᵀ
    matmul_qk = np.dot(Q, K.T)

    # Step 2: Scale by dividing by the square root of the dimension of the key (d)
    d_k = K.shape[-1]  # The dimension of the key (d)
    scaled_attention_logits = matmul_qk / np.sqrt(d_k)

    # Step 3: Apply softmax to get attention weights
    attention_weights = scipy.special.softmax(scaled_attention_logits, axis=-1)

    # Step 4: Multiply attention weights by V to get the output
    output = np.dot(attention_weights, V)

    return attention_weights, output

# Test Inputs
Q = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])  # Query matrix
K = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])  # Key matrix
V = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])  # Value matrix

# Get the attention weights and output
attention_weights, output = scaled_dot_product_attention(Q, K, V)

# Display the results
print("Attention Weights (after softmax):")
print(attention_weights)
print("\nFinal Output Matrix:")
print(output)

#1. Why divide attention scores by √d?
'''
When the dot product between Query (Q) and Key (K) is large (especially if d, the dimension, is high), the values can grow large.
This causes the softmax function to produce extremely small gradients, making learning difficult.
Dividing by √d scales the values down, keeping the softmax stable and effective.
'''

#2. How does self-attention help in understanding word relationships?
'''
Self-attention allows each word in a sentence to focus on other words, regardless of their position.
It helps the model capture context—for example, understanding that “bank” in “He sat by the river bank” relates to “river”.
This makes it powerful for tasks like translation, summarization, and question answering.
'''

Attention Weights (after softmax):
[[0.73105858 0.26894142]
 [0.26894142 0.73105858]]

Final Output Matrix:
[[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]


In [None]:
#Q4: Sentiment Analysis using HuggingFace Transformers

from transformers import pipeline

# Load the pre-trained sentiment-analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Input sentence
sentence = "Despite the high price, the performance of the new MacBook is outstanding."

# Get the sentiment analysis result
result = sentiment_analyzer(sentence)

# Extract and print the label and confidence score
label = result[0]['label']
confidence = result[0]['score']

print(f"Label: {label}")
print(f"Confidence score: {confidence:.4f}")

#1. Architectural Difference Between BERT and GPT
'''
BERT uses only the encoder part of the Transformer architecture.
It is bidirectional, meaning it looks at the full context (both left and right) of a word.
GPT uses only the decoder part.
It is unidirectional (left-to-right), predicting the next word based on previous ones.
BERT = Encoder (for understanding)
GPT = Decoder (for generating)
'''
#2.Why Use Pre-trained Models Like BERT or GPT
'''
Saves time and resources: Training from scratch requires huge data and compute power.
Better performance: These models are trained on large corpora and capture deep language patterns.
Transfer learning: Pre-trained models can be fine-tuned on specific tasks (e.g., sentiment analysis, QA) with small datasets and still perform well.
'''

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Label: POSITIVE
Confidence score: 0.9998
