In [None]:
import spacy
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('wordnet')

# Load spaCy model for English
nlp = spacy.load("en_core_web_sm")

# Load NLTK Porter Stemmer and stopwords
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

# Load Lemmatizer
lemmar = WordNetLemmatizer()

# Sample text
text = "My name is Abdullah Jafri and I'm currently studying computer science from Karachi University."

# Tokenization
doc = nlp(text)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
tokens = [token.text for token in doc]
print("Tokens:", tokens)


Tokens: ['My', 'name', 'is', 'Abdullah', 'Jafri', 'and', 'I', "'m", 'currently', 'studying', 'computer', 'science', 'from', 'Karachi', 'University', '.']


In [None]:

# Stemmer (using NLTK)
stemmed_tokens = [stemmer.stem(token) for token in tokens]
print("Stemmed Tokens:", stemmed_tokens)


Stemmed Tokens: ['my', 'name', 'is', 'abdullah', 'jafri', 'and', 'i', "'m", 'current', 'studi', 'comput', 'scienc', 'from', 'karachi', 'univers', '.']


In [None]:
# Lemmatize(using NLTK)
lem_tokens = [lemmar.lemmatize(token) for token in tokens]
print("Lemmed Tokens:", lem_tokens)

Lemmed Tokens: ['My', 'name', 'is', 'Abdullah', 'Jafri', 'and', 'I', "'m", 'currently', 'studying', 'computer', 'science', 'from', 'Karachi', 'University', '.']


In [None]:

# Text Normalization (lowercasing)
normalized_text = text.lower()
print("Normalized Text:", normalized_text)


Normalized Text: my name is abdullah jafri and i'm currently studying computer science from karachi university.


In [None]:

# Stopword Removal
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
print("Tokens after Stopword Removal:", filtered_tokens)


Tokens after Stopword Removal: ['name', 'Abdullah', 'Jafri', "'m", 'currently', 'studying', 'computer', 'science', 'Karachi', 'University', '.']


In [None]:

# Sentence Segmentation
sentences = [sent.text for sent in doc.sents]
print("Sentences:", sentences)


Sentences: ["My name is Abdullah Jafri and I'm currently studying computer science from Karachi University."]


In [None]:

# Part-of-Speech Tagging (POS)
pos_tags = [(token.text, token.pos_) for token in doc]
print("POS Tags:", pos_tags)


POS Tags: [('My', 'PRON'), ('name', 'NOUN'), ('is', 'AUX'), ('Abdullah', 'PROPN'), ('Jafri', 'PROPN'), ('and', 'CCONJ'), ('I', 'PRON'), ("'m", 'AUX'), ('currently', 'ADV'), ('studying', 'VERB'), ('computer', 'NOUN'), ('science', 'NOUN'), ('from', 'ADP'), ('Karachi', 'PROPN'), ('University', 'PROPN'), ('.', 'PUNCT')]


In [None]:

# Named Entity Recognition (NER)
ner_entities = [(ent.text, ent.label_) for ent in doc.ents]
print("Named Entities:", ner_entities)

Named Entities: [('Abdullah Jafri', 'PERSON'), ('Karachi University', 'ORG')]
