# Menggunakan NLTK

In [0]:
# Contoh Stemming di NLTK
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer

T = 'presumably I would like to MultiPly my provision, saying tHat without crYing'
print('Sentence: ',T)

StemmerS = [LancasterStemmer, PorterStemmer, SnowballStemmer] 
Names = ['Lancaster', 'Porter', 'SnowBall']
for stemmer_name,stem in zip(Names,StemmerS):
    if stemmer_name == 'SnowBall':
        st = stem('english')
    else:
        st = stem()
    print(stemmer_name,': ',' '.join(st.stem(t) for t in T.split()))
# perhatikan, kita tidak melakukan case normalization (lowercase) 

Sentence:  presumably I would like to MultiPly my provision, saying tHat without crYing
Lancaster :  presum i would lik to multiply my provision, say that without cry
Porter :  presum I would like to multipli my provision, say that without cri
SnowBall :  presum i would like to multipli my provision, say that without cri


In [0]:
# Stemming English Porter Stemmer
import nltk
from nltk.tokenize import word_tokenize 
nltk.download('punkt')

def stemmingEnglish(str):
    porter_stemmer = PorterStemmer()
    words = word_tokenize(str)
    result = list()
    for word in words:
        result.append(porter_stemmer.stem(word))
        
    return ' '.join(result)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [0]:
text_data = "She had been with her father and sister when she was attacked and received first aid at the scene, an official said."
stemmingEnglish(text_data)

'she had been with her father and sister when she wa attack and receiv first aid at the scene , an offici said .'

In [0]:
porter_stemmer = PorterStemmer()

word_data = "It originated from the idea that there are readers who prefer learning new skills from the comforts of their drawing rooms"
nltk_tokens = nltk.word_tokenize(word_data)   # Lakukan tokenisasi
for w in nltk_tokens:   # cari kata berdasarkan tokenisasi
       print("Actual: %s  Stem: %s"  % (w,porter_stemmer.stem(w)))

Actual: It  Stem: It
Actual: originated  Stem: origin
Actual: from  Stem: from
Actual: the  Stem: the
Actual: idea  Stem: idea
Actual: that  Stem: that
Actual: there  Stem: there
Actual: are  Stem: are
Actual: readers  Stem: reader
Actual: who  Stem: who
Actual: prefer  Stem: prefer
Actual: learning  Stem: learn
Actual: new  Stem: new
Actual: skills  Stem: skill
Actual: from  Stem: from
Actual: the  Stem: the
Actual: comforts  Stem: comfort
Actual: of  Stem: of
Actual: their  Stem: their
Actual: drawing  Stem: draw
Actual: rooms  Stem: room


In [0]:
porter = PorterStemmer()
lancaster=LancasterStemmer()

word_list = ["friend", "friendship", "friends", "friendships","stabil","destabilize","misunderstanding","railroad","moonlight","football"]
print("{0:20}{1:20}{2:20}".format("Word","Porter Stemmer","lancaster Stemmer"))
for word in word_list:
    print("{0:20}{1:20}{2:20}".format(word,porter.stem(word),lancaster.stem(word)))

Word                Porter Stemmer      lancaster Stemmer   
friend              friend              friend              
friendship          friendship          friend              
friends             friend              friend              
friendships         friendship          friend              
stabil              stabil              stabl               
destabilize         destabil            dest                
misunderstanding    misunderstand       misunderstand       
railroad            railroad            railroad            
moonlight           moonlight           moonlight           
football            footbal             footbal             


# Menggunakan Sastrawi

In [0]:
pip install Sastrawi

Collecting Sastrawi
[?25l  Downloading https://files.pythonhosted.org/packages/6f/4b/bab676953da3103003730b8fcdfadbdd20f333d4add10af949dd5c51e6ed/Sastrawi-1.0.1-py2.py3-none-any.whl (209kB)
[K     |█▋                              | 10kB 21.2MB/s eta 0:00:01[K     |███▏                            | 20kB 3.3MB/s eta 0:00:01[K     |████▊                           | 30kB 4.0MB/s eta 0:00:01[K     |██████▎                         | 40kB 3.1MB/s eta 0:00:01[K     |███████▉                        | 51kB 3.4MB/s eta 0:00:01[K     |█████████▍                      | 61kB 4.1MB/s eta 0:00:01[K     |███████████                     | 71kB 4.3MB/s eta 0:00:01[K     |████████████▌                   | 81kB 4.6MB/s eta 0:00:01[K     |██████████████                  | 92kB 5.1MB/s eta 0:00:01[K     |███████████████▋                | 102kB 4.9MB/s eta 0:00:01[K     |█████████████████▏              | 112kB 4.9MB/s eta 0:00:01[K     |██████████████████▊             | 122kB 4.9MB/s 

In [0]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

stemmer = StemmerFactory().create_stemmer()
teks = "perayaan itu bebarengan dengan saat kita berpergian ke Jogjakarta"
print(stemmer.stem(teks))

raya itu bebarengan dengan saat kita pergi ke jogjakarta


In [0]:
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

sentence_indo = "Saya menyukai belajar. Karena sangat menginginkan agar menjadi anak yang pintar. Saya juga ingin membuat bahagia kedua orang tua."
factory = StemmerFactory()
stemmer = factory.create_stemmer()
stemmer_indo = stemmer.stem(sentence_indo)
print(stemmer_indo)

saya suka ajar karena sangat ingin agar jadi anak yang pintar saya juga ingin buat bahagia dua orang tua


In [0]:
#Stemming Indonesian
def stemmingIndo(str):
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    return stemmer.stem(str)

text_data = "Saya suka belajar. Karena ingin menjadi pintar. Selain itu, saya ingin membuat bahagia kedua orang tua."
stemmingIndo(text_data)

'saya suka ajar karena ingin jadi pintar selain itu saya ingin buat bahagia dua orang tua'