<a href="https://colab.research.google.com/github/SwapnilMergu/neural-network-home-assignment-3/blob/main/home_assignment_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import numpy as np
import os
import requests

#Load a text dataset (using a small sample of Shakespeare) Shakespeare Sonnets
url = "https://www.gutenberg.org/files/1041/1041-0.txt"
text_path = tf.keras.utils.get_file("shakespeare.txt", url)
text = open(text_path, "r", encoding='utf-8').read()

#Take a small portion for faster training
text = text[:100000]

#Preprocess: map characters to integers
chars = sorted(set(text))
char2idx = {u: i for i, u in enumerate(chars)}
idx2char = np.array(chars)

text_as_int = np.array([char2idx[c] for c in text])

# Define sequence length
seq_length = 100
examples_per_epoch = len(text) // seq_length

# Create input-target sequences
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Batch and shuffle
BATCH_SIZE = 64
BUFFER_SIZE = 100000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

#Define the LSTM RNN model
vocab_size = len(chars)
embedding_dim = 256
rnn_units = 512

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.LSTM(rnn_units, return_sequences=True),
    tf.keras.layers.Dense(vocab_size)
])

#Compile and train
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

#Train for few epochs
EPOCHS = 20
model.fit(dataset, epochs=EPOCHS)

#Generate text
def generate_text(model, start_string, temperature=1.0, num_generate=200):
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])

    return start_string + ''.join(text_generated)

#Example text generation
print(generate_text(model, start_string="That time of year", temperature=0.8))


Epoch 1/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 2s/step - loss: 3.8152
Epoch 2/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2s/step - loss: 3.1154
Epoch 3/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 2s/step - loss: 2.9136
Epoch 4/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 2s/step - loss: 2.6701
Epoch 5/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2s/step - loss: 2.4889
Epoch 6/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2s/step - loss: 2.3726
Epoch 7/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step - loss: 2.2852
Epoch 8/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2s/step - loss: 2.2206
Epoch 9/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2s/step - loss: 2.1581
Epoch 10/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step - loss: 2.1093
Epoch 11/

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

#Download necessary NLTK data
nltk.download('punkt_tab')
nltk.download('stopwords')

def nlp_preprocess(sentence):
    #1. Tokenize the sentence
    tokens = word_tokenize(sentence)
    print("Original Tokens:", tokens)

    #2. Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens_no_stop = [word for word in tokens if word.lower() not in stop_words]
    print("Tokens Without Stopwords:", tokens_no_stop)

    #3. Apply stemming
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(word) for word in tokens_no_stop]
    print("Stemmed Words:", stemmed_tokens)

#Example sentence
sentence = "NLP techniques are used in virtual assistants like Alexa and Siri."
nlp_preprocess(sentence)


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Original Tokens: ['NLP', 'techniques', 'are', 'used', 'in', 'virtual', 'assistants', 'like', 'Alexa', 'and', 'Siri', '.']
Tokens Without Stopwords: ['NLP', 'techniques', 'used', 'virtual', 'assistants', 'like', 'Alexa', 'Siri', '.']
Stemmed Words: ['nlp', 'techniqu', 'use', 'virtual', 'assist', 'like', 'alexa', 'siri', '.']


In [None]:
import spacy

#Load spaCy's English language model
nlp = spacy.load("en_core_web_sm")

#Input sentence
sentence = "Barack Obama served as the 44th President of the United States and won the Nobel Peace Prize in 2009."

#Process the sentence
doc = nlp(sentence)

#Extract and print named entities
print("Named Entities:")
for ent in doc.ents:
    print(f"Text: {ent.text}, Label: {ent.label_}, Start: {ent.start_char}, End: {ent.end_char}\n")


Named Entities:
Text: Barack Obama, Label: PERSON, Start: 0, End: 12

Text: 44th, Label: ORDINAL, Start: 27, End: 31

Text: the United States, Label: GPE, Start: 45, End: 62

Text: the Nobel Peace Prize, Label: WORK_OF_ART, Start: 71, End: 92

Text: 2009, Label: DATE, Start: 96, End: 100



In [None]:
import numpy as np
def scaled_dot_product_attention(Q, K, V):
    #Dot product of Q and K^T
    dk = Q.shape[-1]
    scores = np.dot(Q, K.T)
    #Scale by sqrt(dk)
    scaled_scores = scores / np.sqrt(dk)
    #Apply softmax to get attention weights
    def softmax(x):
        e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)
    attention_weights = softmax(scaled_scores)
    #Multiply by V to get output
    output = np.dot(attention_weights, V)
    return attention_weights, output
#Input matrices
Q = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])
K = np.array([[1, 0, 1, 0],
              [0, 1, 0, 1]])
V = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8]])

#Run the function
weights, output = scaled_dot_product_attention(Q, K, V)

#Print results
print("Attention Weights:\n", weights)
print("\nOutput:\n", output)


Attention Weights:
 [[0.73105858 0.26894142]
 [0.26894142 0.73105858]]

Output:
 [[2.07576569 3.07576569 4.07576569 5.07576569]
 [3.92423431 4.92423431 5.92423431 6.92423431]]


In [2]:
from transformers import pipeline

# Load the pre-trained sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Input sentence
text = "Despite the high price, the performance of the new MacBook is outstanding."

# Analyze sentiment
result = sentiment_pipeline(text)[0]

# Print the result

print("Sentiment:", result['label'])
print("Confidence Score:", round(result['score'], 4))


Sentiment: POSITIVE
Confidence Score: 0.9998
