<a href="https://colab.research.google.com/github/Sairam7203/NeuralNetworks/blob/main/Untitled30.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import re
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Download required data
nltk.download('wordnet')
nltk.download('omw-1.4')

# 🔥 Manual stopword list
stop_words = {
    "a", "an", "the", "is", "are", "in", "on", "at", "for", "with",
    "to", "of", "by", "and", "or", "as", "like", "but", "from", "that",
    "this", "it", "be", "was", "were", "will", "would", "has", "have", "had", "do", "does", "did"
}

def simple_tokenize(text):
    return re.findall(r'\b\w+\b', text)

def preprocess_nlp(sentence):
    print("Original Sentence:")
    print(sentence)
    print("\n---")

    # 1. Tokenization
    tokens = simple_tokenize(sentence)
    print("1. Original Tokens:")
    print(tokens)
    print("\n---")

    # 2. Stopword Removal
    tokens_no_stop = [word for word in tokens if word.lower() not in stop_words]
    print("2. Tokens Without Stopwords:")
    print(tokens_no_stop)
    print("\n---")

    # 3. Stemming
    stemmer = PorterStemmer()
    stemmed = [stemmer.stem(word) for word in tokens_no_stop]
    print("3. Stemmed Words:")
    print(stemmed)
    print("\n---")

    # 4. Lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized = [lemmatizer.lemmatize(word) for word in tokens_no_stop]
    print("4. Lemmatized Words:")
    print(lemmatized)

# Run the function
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."
preprocess_nlp(sentence)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Original Sentence:
NLP techniques are used in virtual assistants like Alexa and Siri.

---
1. Original Tokens:
['NLP', 'techniques', 'are', 'used', 'in', 'virtual', 'assistants', 'like', 'Alexa', 'and', 'Siri']

---
2. Tokens Without Stopwords:
['NLP', 'techniques', 'used', 'virtual', 'assistants', 'Alexa', 'Siri']

---
3. Stemmed Words:
['nlp', 'techniqu', 'use', 'virtual', 'assist', 'alexa', 'siri']

---
4. Lemmatized Words:
['NLP', 'technique', 'used', 'virtual', 'assistant', 'Alexa', 'Siri']


In [3]:
# Install spaCy if it's not already installed
!pip install spacy

# Download the English language model
!python -m spacy download en_core_web_sm

import spacy

# Load the English NLP model
nlp = spacy.load("en_core_web_sm")

# Input sentence
sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."

# Process the sentence with spaCy's NLP pipeline
doc = nlp(sentence)

# Loop through the named entities in the document and print their details
for ent in doc.ents:
    print(f"Entity Text: {ent.text}")
    print(f"Entity Label: {ent.label_}")
    print(f"Start Position: {ent.start_char}")
    print(f"End Position: {ent.end_char}")
    print("---")


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m94.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Entity Text: Barack Obama
Entity Label: PERSON
Start Position: 0
End Position: 12
---
Entity Text: 44th
Entity Label: ORDINAL
Start Position: 27
End Position: 31
---
Entity Text: the United States
Entity Label: GPE
Start Position: 45
End Position: 62
---
Entity Text: the Nobel Peace Prize
Entity Label: WORK_OF_ART
Start Position: 71


In [4]:
import numpy as np

def scaled_dot_product_attention(Q, K, V):
    # Step 1: Compute the dot product of Q and Kᵀ
    dot_product = np.dot(Q, K.T)

    # Step 2: Scale the dot product by dividing by sqrt(d), where d is the key dimension
    d_k = K.shape[-1]  # The dimension of the key
    scaled_dot_product = dot_product / np.sqrt(d_k)

    # Step 3: Apply softmax to the scaled dot product to get attention weights
    # Softmax is applied row-wise
    attention_weights = np.exp(scaled_dot_product) / np.sum(np.exp(scaled_dot_product), axis=-1, keepdims=True)

    # Step 4: Multiply the attention weights by V to get the final output
    output = np.dot(attention_weights, V)

    # Return the attention weights and final output
    return attention_weights, output

# Test input matrices
Q = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
K = np.array([[1, 0, 1, 0], [0, 1, 0, 1]])
V = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

# Run the scaled dot-product attention function
attention_weights, output = scaled_dot_product_attention(Q, K, V)

# Print the results
print("Attention Weights Matrix:")
print(attention_weights)
print("\nFinal Output Matrix:")
print(output)


Attention Weights Matrix:
[[0.73105858 0.26894142]
 [0.26894142 0.73105858]]

Final Output Matrix:
[[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]


In [5]:
# Install HuggingFace's Transformers and Torch if not already installed
!pip install transformers
!pip install torch

from transformers import pipeline

# Load the pre-trained sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Input sentence
sentence = "Despite the high price, the performance of the new MacBook is outstanding."

# Analyze the sentiment
result = sentiment_analyzer(sentence)

# Print the result: Sentiment and confidence score
label = result[0]['label']
confidence = result[0]['score']

print(f"Sentiment: {label}")
print(f"Confidence Score: {confidence:.4f}")


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


Sentiment: POSITIVE
Confidence Score: 0.9998
