Prepared by Muhammad Sohail Abbas sohail.abbas@isb.nu.edu.pk

## Table of Contents
* Introduction to NLTK
* Installing NLTK
* Tokenization
* Sentence Tokenization
* Word Tokenization
* Counting Words
* Stopwords Removal
* Stemming and Lemmatization
* ngrams

## Introduction to NLTK
Natural Language Toolkit (NLTK) is a Python library that provides tools and resources for working with human language data. It is widely used for text processing, text analysis, and natural language understanding.

## Installing NLTK
You can install NLTK using pip:

In [None]:
pip install nltk

Note: you may need to restart the kernel to use updated packages.




## Tokenization
### Sentence Tokenization
Sentence tokenization is the process of splitting text into sentences.

In [9]:
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

text = "NLTK is a powerful library. It helps with text processing."
sentences = sent_tokenize(text)
print(sentences)

['NLTK is a powerful library.', 'It helps with text processing.']


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Word Tokenization
Word tokenization is the process of splitting sentences into words.

In [4]:
from nltk.tokenize import word_tokenize

sentence = "NLTK is a powerful library. It is very useful and it is also very helpful at same time"
words = word_tokenize(sentence)
print(words)

['NLTK', 'is', 'a', 'powerful', 'library', '.', 'It', 'is', 'very', 'useful', 'and', 'it', 'is', 'also', 'very', 'helpful', 'at', 'same', 'time']


## Counting Words
You can count the frequency of a word in a text.

In [6]:
words.count('very')
from nltk import word_tokenize
sentence = "NLTK is a powerful library. It is very useful and it is also very helpful at same time"
words_new = word_tokenize(sentence)
print(words_new)

['NLTK', 'is', 'a', 'powerful', 'library', '.', 'It', 'is', 'very', 'useful', 'and', 'it', 'is', 'also', 'very', 'helpful', 'at', 'same', 'time']


In [7]:
from nltk.text import Text
from nltk.text import Text
lin_2 = Text(words)
lin = Text(words)
print (lin_2)
lin.vocab()

<Text: NLTK is a powerful library . It is...>


FreqDist({'is': 3, 'very': 2, 'NLTK': 1, 'a': 1, 'powerful': 1, 'library': 1, '.': 1, 'It': 1, 'useful': 1, 'and': 1, ...})

## Stopwords Removal
Stopwords are common words (e.g., "the," "and," "is") that are often removed from text.

In [15]:
from nltk.corpus import stopwords
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))


print(stop_words)

{'no', 'couldn', 'she', 'then', 'll', 'not', 'his', 'y', 'hadn', 'to', 'myself', 'whom', 'didn', 'won', 'you', 'haven', 'only', 'her', 'but', 'am', 'of', 'after', 'd', 'can', 'don', 'm', 'shouldn', 'him', 'about', 'before', 'on', 'so', "didn't", 'just', 'its', 'a', 're', 'shan', 'few', 'do', 'their', 'is', 'being', 'wouldn', 'isn', 'in', 'into', 'that', 'there', 'o', 'what', 'mustn', 'while', 'were', 'for', 'where', 'has', "mustn't", 'again', 'be', "won't", 'up', "isn't", 'all', 'because', 'with', "aren't", 'through', 'this', 'hasn', 'than', 'how', 'ourselves', 'doing', "she's", 'itself', 'same', "that'll", 'these', 'they', 'was', 'between', 'ma', 'wasn', "hadn't", 'been', 'over', 'once', 'doesn', 'if', 'it', 'as', 'further', "you'll", 'themselves', 'himself', 'hers', 'our', 'ain', "should've", "don't", 'nor', "you'd", 'other', 'mightn', 'herself', "haven't", "shan't", 'those', "wasn't", "it's", 'yourselves', 'the', 'at', "mightn't", 'above', "wouldn't", 'he', 'by', 'we', 'which', 'her

In [None]:
filtered_words = [word for word in words if word.lower() not in stop_words]
filtered_words = [word for word in words if word.lower() not in stop_words]
print(filtered_words)

['NLTK', 'powerful', 'library', '.']


In [22]:
stopwords.fileids()
import pandas as pd
dir(nltk)

['ARLSTem',
 'ARLSTem2',
 'AbstractLazySequence',
 'AffixTagger',
 'AlignedSent',
 'Alignment',
 'AnnotationTask',
 'ApplicationExpression',
 'Assignment',
 'BigramAssocMeasures',
 'BigramCollocationFinder',
 'BigramTagger',
 'BinaryMaxentFeatureEncoding',
 'BlanklineTokenizer',
 'BllipParser',
 'BottomUpChartParser',
 'BottomUpLeftCornerChartParser',
 'BottomUpProbabilisticChartParser',
 'Boxer',
 'BrillTagger',
 'BrillTaggerTrainer',
 'CFG',
 'CRFTagger',
 'CfgReadingCommand',
 'ChartParser',
 'ChunkParserI',
 'ChunkScore',
 'Cistem',
 'ClassifierBasedPOSTagger',
 'ClassifierBasedTagger',
 'ClassifierI',
 'ConcordanceIndex',
 'ConditionalExponentialClassifier',
 'ConditionalFreqDist',
 'ConditionalProbDist',
 'ConditionalProbDistI',
 'ConfusionMatrix',
 'ContextIndex',
 'ContextTagger',
 'ContingencyMeasures',
 'CoreNLPDependencyParser',
 'CoreNLPParser',
 'Counter',
 'CrossValidationProbDist',
 'DRS',
 'DecisionTreeClassifier',
 'DefaultTagger',
 'DependencyEvaluator',
 'DependencyG

## Stemming and Lemmatization
Stemming reduces words to their root form, while lemmatization reduces them to their base or dictionary form.

In [24]:
from nltk.stem import PorterStemmer, WordNetLemmatizer
nltk.download('wordnet')
nltk.download('omw-1.4')

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
word = "running"
stemmed_word = stemmer.stem(word)
lemmatized_word = lemmatizer.lemmatize(word, pos="v")

print(f"Stemmed: {stemmed_word}, Lemmatized: {lemmatized_word}")

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Stemmed: run, Lemmatized: run


## ngrams
ngrams gives you the freuency of not only a single word but more than one words as specified

In [10]:
text = """The Young King
The Birthday of the Infantaimport nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

text = "NLTK is a powerful library. It helps with text processing."
sentences = sent_tokenize(text)
print(sentences)
The Fisherman and his Soul
The Star-child"""

In [11]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [12]:
# normalizung the data
text = text.lower()
text

'the young king\nthe birthday of the infantaimport nltk\nnltk.download(\'punkt\')\nfrom nltk.tokenize import sent_tokenize\n\ntext = "nltk is a powerful library. it helps with text processing."\nsentences = sent_tokenize(text)\nprint(sentences)\nthe fisherman and his soul\nthe star-child'

In [None]:
# character wise frequency distribution
myFD = nltk.FreqDist(text)
myFD

FreqDist({' ': 11, 'h': 9, 't': 8, 'e': 6, 'n': 6, 'i': 6, 'a': 6, 's': 4, 'o': 3, '\n': 3, ...})

In [13]:
# generating tokens before creating ngrams
#tokens = nltk.word_tokenize(text)
tokens = nltk.word_tokenize(text)
unigrams = nltk.ngrams(tokens, 1)
bigrams = nltk.ngrams(tokens, 2)
tokens_FD = nltk.FreqDist(bigrams)
tokens
for token in list(tokens_FD.items()):

   print(token[0], token[1])

('the', 'young') 1
('young', 'king') 1
('king', 'the') 1
('the', 'birthday') 1
('birthday', 'of') 1
('of', 'the') 1
('the', 'infantaimport') 1
('infantaimport', 'nltk') 1
('nltk', 'nltk.download') 1
('nltk.download', '(') 1
('(', "'punkt") 1
("'punkt", "'") 1
("'", ')') 1
(')', 'from') 1
('from', 'nltk.tokenize') 1
('nltk.tokenize', 'import') 1
('import', 'sent_tokenize') 1
('sent_tokenize', 'text') 1
('text', '=') 1
('=', '``') 1
('``', 'nltk') 1
('nltk', 'is') 1
('is', 'a') 1
('a', 'powerful') 1
('powerful', 'library') 1
('library', '.') 1
('.', 'it') 1
('it', 'helps') 1
('helps', 'with') 1
('with', 'text') 1
('text', 'processing') 1
('processing', '.') 1
('.', "''") 1
("''", 'sentences') 1
('sentences', '=') 1
('=', 'sent_tokenize') 1
('sent_tokenize', '(') 1
('(', 'text') 1
('text', ')') 1
(')', 'print') 1
('print', '(') 1
('(', 'sentences') 1
('sentences', ')') 1
(')', 'the') 1
('the', 'fisherman') 1
('fisherman', 'and') 1
('and', 'his') 1
('his', 'soul') 1
('soul', 'the') 1
('the

In [14]:
# unigrams
unigrams = nltk.ngrams(tokens, 1)

unigramFD = nltk.FreqDist(unigrams)
unigramsFd = nltk.FreqDist(unigrams)

for token in list(unigramFD.items()):
    print(token[0], token[1])

('the',) 5
('young',) 1
('king',) 1
('birthday',) 1
('of',) 1
('infantaimport',) 1
('nltk',) 2
('nltk.download',) 1
('(',) 3
("'punkt",) 1
("'",) 1
(')',) 3
('from',) 1
('nltk.tokenize',) 1
('import',) 1
('sent_tokenize',) 2
('text',) 3
('=',) 2
('``',) 1
('is',) 1
('a',) 1
('powerful',) 1
('library',) 1
('.',) 2
('it',) 1
('helps',) 1
('with',) 1
('processing',) 1
("''",) 1
('sentences',) 2
('print',) 1
('fisherman',) 1
('and',) 1
('his',) 1
('soul',) 1
('star-child',) 1


In [37]:
# bigrams
bigrams = nltk.ngrams(tokens, 2)
bigramsFD = nltk.FreqDist(bigrams)

for bigram in list(bigramsFD.items()):
    print(bigram[0], bigram[1])

('the', 'young') 1
('young', 'king') 1
('king', 'the') 1
('the', 'birthday') 1
('birthday', 'of') 1
('of', 'the') 1
('the', 'infantaimport') 1
('infantaimport', 'nltk') 1
('nltk', 'nltk.download') 1
('nltk.download', '(') 1
('(', "'punkt") 1
("'punkt", "'") 1
("'", ')') 1
(')', 'from') 1
('from', 'nltk.tokenize') 1
('nltk.tokenize', 'import') 1
('import', 'sent_tokenize') 1
('sent_tokenize', 'text') 1
('text', '=') 1
('=', '``') 1
('``', 'nltk') 1
('nltk', 'is') 1
('is', 'a') 1
('a', 'powerful') 1
('powerful', 'library') 1
('library', '.') 1
('.', 'it') 1
('it', 'helps') 1
('helps', 'with') 1
('with', 'text') 1
('text', 'processing') 1
('processing', '.') 1
('.', "''") 1
("''", 'sentences') 1
('sentences', '=') 1
('=', 'sent_tokenize') 1
('sent_tokenize', '(') 1
('(', 'text') 1
('text', ')') 1
(')', 'print') 1
('print', '(') 1
('(', 'sentences') 1
('sentences', ')') 1
(')', 'the') 1
('the', 'fisherman') 1
('fisherman', 'and') 1
('and', 'his') 1
('his', 'soul') 1
('soul', 'the') 1
('the

In [None]:
# trigrams
trigrams = nltk.ngrams(tokens, 3)
trigramsFD = nltk.FreqDist(trigrams)

for trigram in list(trigramsFD.items()):
    print(trigram[0], trigram[1])

('the', 'young', 'king') 1
('young', 'king', 'the') 1
('king', 'the', 'birthday') 1
('the', 'birthday', 'of') 1
('birthday', 'of', 'the') 1
('of', 'the', 'infanta') 1
('the', 'infanta', 'the') 1
('infanta', 'the', 'fisherman') 1
('the', 'fisherman', 'and') 1
('fisherman', 'and', 'his') 1
('and', 'his', 'soul') 1
('his', 'soul', 'the') 1
('soul', 'the', 'star-child') 1


# Lab Tasks

## Text file reading

In [3]:
with open('kingdomofheaven.txt', 'r', encoding = 'utf-8') as file:
    # Read the entire file as a single string
    history = file.read()
# Now, file_contents contains the entire content of the file as a single string
print(history)

THE KINGDOM OF HEAVEN
by
William Monahan
Early Draft
FOR EDUCATIONAL PURPOSES ONLY
FADE UP ON:
RAVENS in stripped trees. Frost clings to hedges, and low fog
lies on the November fields of France. A season of mud and
snow.
TITLE: “FRANCE, 1186”
Sustain the image of smoking fields and then (with the sound
of PICK AND SPADE...)
EXT. A CROSSRAODS. DAWN
OPEN CLOSE on the most medieval face you’ve ever seen: a
pale, injured, vengeful face, capable of a routine mask of
piety: a PRIEST. With a dirty fingernail the Priest flicks
out part of a frozen worm from a winter apple. We are at an
INTERSECTION of two lanes of near-frozen mud in HEDGEROW
COUNTRY.
Two GRAVEDIGGERS, a cold PRIEST, a BODY, At a CROSSROADS
overlooked by a Celtic cross.
ON SOUND, as the Priest contemplates his meal (He wants
better and is sure he deserves it) we hear...the PICK AND
SPADE at work, digging the grave at the EXACT CENTER of the
CROSSROADS.
GRAVEDIGGER (OS)
(singing)
I am Francois, to my dismay
(the SPADE digs into

### Task 1: City Summary (Sentence Tokenization)

Create a Python program that extracts all sentences containing the word Jerusalem.

### Task 2: Tokenization and n-grams
Implement tokenization technique to capture phrases or n-grams (uni-grams, bi-grams, tri-grams, uad-grams) from the text for following tasks.

* take sentence containing word kerak, display three lowest occuring uni-grams.
* Check how many tri-gram, bi-grams and quad-grams are there that has occurance more than 1.

### Task 3: Stopwords Removal
Remove common stopwords using NLTK from the word tokens.

* Take consective sentences that has word saladin or king on both and remove stop words and display filtered tokens.

### Task 4: Stemming and Lemmatization

* Apply Stemming and Lemmatization on entire dataset. Use adjective 'a' not verb 'v' in pos for Lemmatization. How many words are same before and after stemming and lemmatization.

### Task 5: Vocabulary

1. Display occurance of each word in text. How many unique words are in dataset?


### Task 6: Frequency plot (tokenization, matplotlib)
Analyze the geopolitical aspects mentioned in the text.

* Create visual representations (e.g., charts or graphs) using numpy, pandas, and matplotlib if needed, to illustrate these aspects. Frequency of occurence of name like Jerusalem, king, queen, valley, saladin.


In [4]:
#Question 1
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

text = history
sentences = sent_tokenize(text)
list_words = []
for i in sentences:
  if "Jerusalem" in text:
    print(i)
    list_words.append(i)
for j in list_words:
  print(j)

THE KINGDOM OF HEAVEN
by
William Monahan
Early Draft
FOR EDUCATIONAL PURPOSES ONLY
FADE UP ON:
RAVENS in stripped trees.
Frost clings to hedges, and low fog
lies on the November fields of France.
A season of mud and
snow.
TITLE: “FRANCE, 1186”
Sustain the image of smoking fields and then (with the sound
of PICK AND SPADE...)
EXT.
A CROSSRAODS.
DAWN
OPEN CLOSE on the most medieval face you’ve ever seen: a
pale, injured, vengeful face, capable of a routine mask of
piety: a PRIEST.
With a dirty fingernail the Priest flicks
out part of a frozen worm from a winter apple.
We are at an
INTERSECTION of two lanes of near-frozen mud in HEDGEROW
COUNTRY.
Two GRAVEDIGGERS, a cold PRIEST, a BODY, At a CROSSROADS
overlooked by a Celtic cross.
ON SOUND, as the Priest contemplates his meal (He wants
better and is sure he deserves it) we hear...the PICK AND
SPADE at work, digging the grave at the EXACT CENTER of the
CROSSROADS.
GRAVEDIGGER (OS)
(singing)
I am Francois, to my dismay
(the SPADE digs into

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
from re import T
#Question 2

# unigrams
# unigrams = nltk.ngrams(tokens, 1)
# unigramFD = nltk.FreqDist(unigrams)
list_to_store_words = []
new_word_list = []
count = 0
for sentence in sentences:
  if "kerak" in sentence.lower():
    words = nltk.word_tokenize(sentence)
    list_to_store_words.extend(words)
for sentence_new in sentences:
  words_new = nltk.word_tokenize(sentence_new)
  new_word_list.extend(words_new)

unigrams = nltk.ngrams(new_word_list, 1)
unigramFD = nltk.FreqDist(unigrams)
Bigrams = nltk.ngrams(list_to_store_words, 2)
BigramsFD = nltk.FreqDist(Bigrams)
Trigrams = nltk.ngrams(new_word_list, 3)
TrigramsFD = nltk.FreqDist(Trigrams)
Quadgrams = nltk.ngrams(new_word_list, 4)
QuadgramsFD = nltk.FreqDist(Quadgrams)
list_to_store_words.sort()

for token in list(unigramFD.items()):
  if token[1]==1:
    print(token[0])
    count+=1
  if count==3:
    break

counter_bigrams = 0
counter_trigrams = 0
counter_quad_grams = 0
for token in list(BigramsFD.items()):
  if token[1]>=2:
    counter_bigrams +=1

for token in list(TrigramsFD.items()):
  if token[1]>=3:
    counter_trigrams +=1
for token in list(QuadgramsFD.items()):
  if token[1]>=4:
    counter_quad_grams+=1
unigrams = nltk.ngrams(new_word_list, 1)
unigramFD = nltk.FreqDist(unigrams)
Bigrams = nltk.ngrams(list_to_store_words, 2)
BigramsFD = nltk.FreqDist(Bigrams)
Trigrams = nltk.ngrams(new_word_list, 3)
TrigramsFD = nltk.FreqDist(Trigrams)
Quadgrams = nltk.ngrams(new_word_list, 4)
QuadgramsFD = nltk.FreqDist(Quadgrams)
list_to_store_words.sort()
print(counter_bigrams)
print(counter_trigrams)
print(counter_quad_grams)

('HEAVEN',)
('William',)
('Monahan',)
19
948
154


In [4]:
#Question 3
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

filtered = []
sentences_new = sent_tokenize(history)
for i in range(len(sentences_new)):
  if 'saldin' or 'king' in sentences_new[i] and 'saldin' or 'king' in sentences_new[i+1]:

    words = nltk.word_tokenize(sentences_new[i])
    filtered.extend(words)

print(filtered)



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Shahz\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['THE', 'KINGDOM', 'OF', 'HEAVEN', 'by', 'William', 'Monahan', 'Early', 'Draft', 'FOR', 'EDUCATIONAL', 'PURPOSES', 'ONLY', 'FADE', 'UP', 'ON', ':', 'RAVENS', 'in', 'stripped', 'trees', '.', 'Frost', 'clings', 'to', 'hedges', ',', 'and', 'low', 'fog', 'lies', 'on', 'the', 'November', 'fields', 'of', 'France', '.', 'A', 'season', 'of', 'mud', 'and', 'snow', '.', 'TITLE', ':', '“', 'FRANCE', ',', '1186', '”', 'Sustain', 'the', 'image', 'of', 'smoking', 'fields', 'and', 'then', '(', 'with', 'the', 'sound', 'of', 'PICK', 'AND', 'SPADE', '...', ')', 'EXT', '.', 'A', 'CROSSRAODS', '.', 'DAWN', 'OPEN', 'CLOSE', 'on', 'the', 'most', 'medieval', 'face', 'you', '’', 've', 'ever', 'seen', ':', 'a', 'pale', ',', 'injured', ',', 'vengeful', 'face', ',', 'capable', 'of', 'a', 'routine', 'mask', 'of', 'piety', ':', 'a', 'PRIEST', '.', 'With', 'a', 'dirty', 'fingernail', 'the', 'Priest', 'flicks', 'out', 'part', 'of', 'a', 'frozen', 'worm', 'from', 'a', 'winter', 'apple', '.', 'We', 'are', 'at', 'an', 

In [None]:
#Question 4
from nltk.stem import PorterStemmer, WordNetLemmatizer
nltk.download('wordnet')
nltk.download('omw-1.4')

stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

word = "a"
stemmed_word = stemmer.stem(word)
lemmatized_word = lemmatizer.lemmatize(word, pos="v")

print(f"Stemmed: {stemmed_word}, Lemmatized: {lemmatized_word}")

Stemmed: a, Lemmatized: a


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [5]:
#Question 5
list_to_store_words = []
with open('kingdomofheaven.txt', 'r', encoding='utf-8') as history:
  new_history = history.read()
  
for sentence_new in sentences:
  words_new = nltk.word_tokenize(sentence_new)
  list_to_store_words .extend(words_new)
counter = 0
unigrams = nltk.ngrams(new_word_list, 1)
unigramFD = nltk.FreqDist(unigrams)
print("The occurence of each word in list is: ")
for token in list(unigramFD.items()):
    print(token[0], token[1])
    if token[1]==1:
      counter+=1
print(f"The number of unique words present are: {counter}")

NameError: name 'new_word_list' is not defined

In [16]:
import nltk
from nltk.tokenize import WordPunctTokenizer
import string
def remove_punctuation(text):
    # Use WordPunctTokenizer to tokenize the text and then join the tokens without punctuation
    translator = str.maketrans(text)
    tokens = tokenizer.tokenize(text)
    text_without_punctuation = ' '.join(tokens)
    
    return text_without_punctuation

# Example usage:
text_with_punctuation = "Hello, world! This is an example text with punctuation."
text_without_punctuation = remove_punctuation(text_with_punctuation)

print("Original text:", text_with_punctuation)
print("Text without punctuation:", text_without_punctuation)


TypeError: if you give only one argument to maketrans it must be a dict

In [19]:
import string 
dir(string.punctuation)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'removeprefix',
 'removesuffix',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
