In [None]:
import re

text = "Urgent: Account issue needs resolution!"
pattern = r"(urgent|issue)"
matches = re.findall(pattern, text, re.IGNORECASE)
print("Keywords found:", matches)  # Output: ['Urgent', 'issue']

In [None]:
import nltk
nltk.download('punkt')

text = "Bank service is great!"
tokens = nltk.word_tokenize(text)
print("Tokens:", tokens)  # ['Bank', 'service', 'is', 'great', '!']

# Alternative with spaCy
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
tokens_spacy = [token.text for token in doc]
print("spaCy Tokens:", tokens_spacy)  # ['Bank', 'service', 'is', 'great', '!']

In [None]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
words = ["running", "cancers", "university"]
stems = [stemmer.stem(word) for word in words]
print("Stems:", stems)  # ['run', 'cancer', 'univers']

In [None]:
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
words = [("better", "a"), ("ran", "v")]
lemmas = [lemmatizer.lemmatize(word, pos=pos) for word, pos in words]
print("Lemmas:", lemmas)  # ['good', 'run']

In [None]:
from nltk.corpus import stopwords
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
text = "The bank is great"
filtered = [word for word in text.lower().split() if word not in stop_words]
print("Filtered:", filtered)  # ['bank', 'great']

In [None]:
from nltk import ngrams, word_tokenize
nltk.download('punkt')

text = "Bank offers good service"
tokens = word_tokenize(text)
bigrams = list(ngrams(tokens, 2))
print("Bigrams:", bigrams)  # [('Bank', 'offers'), ('offers', 'good'), ('good', 'service')]

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")  # Lightweight, free
text = "Bank offers loans"
doc = nlp(text)
pos_tags = [(token.text, token.pos_) for token in doc]
print("POS Tags:", pos_tags)  # [('Bank', 'NOUN'), ('offers', 'VERB'), ('loans', 'NOUN')]

In [None]:
doc = nlp("Bank offers loans")
for token in doc:
    print(f"{token.text} --> {token.dep_} --> {token.head.text}")
# Output: Bank --> nsubj --> offers
#         offers --> ROOT --> offers
#         loans --> dobj --> offers

In [None]:
doc = nlp("HDFC Bank launches new loan in Mumbai on 2025-05-10")
entities = [(ent.text, ent.label_) for ent in doc.ents]
print("Entities:", entities)  # [('HDFC Bank', 'ORG'), ('Mumbai', 'GPE'), ('2025-05-10', 'DATE')]

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

texts = ["Bank is great", "but bank service is poor"]
vectorizer = CountVectorizer()
bow = vectorizer.fit_transform(texts)
print("BoW Matrix:", bow.toarray())
print("Vocabulary:", vectorizer.vocabulary_)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

texts = ["Bank has fraud issues", "Service is great"]
vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(texts)
print("TF-IDF Matrix:", tfidf.toarray())

In [None]:
from sklearn.decomposition import TruncatedSVD

matrix = [[1, 0], [0, 1]]  # Simplified term-document matrix
svd = TruncatedSVD(n_components=1)
lsa = svd.fit_transform(matrix)
print("LSA:", lsa)

In [None]:
#!pip install gensim

In [None]:
from gensim.models import LdaModel
from gensim.corpora import Dictionary

texts = [["bank", "loan"], ["service", "great"]]
dictionary = Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
lda = LdaModel(corpus, num_topics=2, id2word=dictionary)
print("Topics:", lda.print_topics())

In [None]:
from gensim.models import Word2Vec

sentences = [
    ["bank", "offers", "loan"],
    ["bank", "provides", "credit"],
    ["service", "is", "great"]
]
model = Word2Vec(sentences, vector_size=100, window=5, min_count=1, workers=4)
print("Vector for 'loan':", model.wv["loan"][:5])  # First 5 dimensions
print("Similar to 'loan':", model.wv.most_similar("loan", topn=2))  # ['credit', 'bank']

In [None]:
import numpy as np

# Simplified: Load pre-trained GloVe
glove_vectors = {"bank": np.random.rand(100), "loan": np.random.rand(100)}
print("GloVe vector for 'bank':", glove_vectors["bank"][:5])

In [None]:
from gensim.models import FastText

sentences = [["antibiotic", "resistance"], ["drug", "therapy"]]
model = FastText(sentences, vector_size=100, window=5, min_count=1, workers=4)
print("Vector for 'antibiotic':", model.wv["antibiotic"][:5])
print("Similar to 'antibiotic':", model.wv.most_similar("antibiotic", topn=2))

In [None]:
import torch
import torch.nn as nn

rnn = nn.RNN(input_size=10, hidden_size=20, batch_first=True)
input_seq = torch.randn(1, 5, 10)  # Batch, seq len, input size
output, hn = rnn(input_seq)
fc = nn.Linear(20, 2)
final_output = fc(output[:, -1, :])  # Last time step
print("RNN Output:", final_output.shape)  # [1, 2]

In [None]:
lstm = nn.LSTM(input_size=10, hidden_size=20, batch_first=True)
input_seq = torch.randn(1, 5, 10)
output, (hn, cn) = lstm(input_seq)
fc = nn.Linear(20, 2)
final_output = fc(output[:, -1, :])
print("LSTM Output:", final_output.shape)  # [1, 2]

In [None]:
gru = nn.GRU(input_size=10, hidden_size=20, batch_first=True)
input_seq = torch.randn(1, 5, 10)
output, hn = gru(input_seq)
fc = nn.Linear(20, 2)
final_output = fc(output[:, -1, :])
print("GRU Output:", final_output.shape)  # [1, 2]