#### 【 자연어 처리용 형태소분석 - NLTK 】
- 정규화
    * 영어 -> 대소문자 일치
    * 같은 의미 -> 다른 형태 토큰 정리 
    * 어간추출  : 형태적인 기반으로 어간/어미 분리
    * 표제어추출 : 품사기반 문법 활용해서 어간/어미 분리 
    
- nltk.stem 서브모듈 활용

In [1]:
## 모듈 로딩
from nltk.stem import WordNetLemmatizer                 ## 표제어 추출
from nltk.stem import PorterStemmer, LancasterStemmer   ## 어간/어미 추출


[어간/어미 추출] <hr>

In [None]:
## -----------------------------------------------
## [1] 데이터 준비
## -----------------------------------------------
words = ['policy', 'doing', 'organization', 'have', 'going', 'love', 'lives', 'fly', 'dies', 'watched', 'has', 'starting']

## -----------------------------------------------
## [2] 형태적 의미로 어간/어미 분리 => 어근 추출
## -----------------------------------------------
lanStem  = LancasterStemmer()
portStem = PorterStemmer()

for word in words:
    print(f'{word:<16} ---> Lan: {lanStem.stem(word):<16}  Port : {portStem.stem(word)}')


policy           ---> Lan: policy            Port : polici
doing            ---> Lan: doing             Port : do
organization     ---> Lan: org               Port : organ
have             ---> Lan: hav               Port : have
going            ---> Lan: going             Port : go
love             ---> Lan: lov               Port : love
lives            ---> Lan: liv               Port : live
fly              ---> Lan: fly               Port : fli
dies             ---> Lan: die               Port : die
watched          ---> Lan: watch             Port : watch
has              ---> Lan: has               Port : ha
starting         ---> Lan: start             Port : start


In [8]:
## -----------------------------------------------
## [3] 사전/품사 기반 어간/어미 분리 => 표제어 추출
## -----------------------------------------------
wordLem = WordNetLemmatizer()  ## 기본 : 품사 - 명사 n

for word in words:
    print(f'{word:<16} ---> Pos="n" :{wordLem.lemmatize(word):<16} ', end='')
    print(f'Pos="v" :{wordLem.lemmatize(word, pos="v"):<16} ')

policy           ---> Pos="n" :policy           Pos="v" :policy           
doing            ---> Pos="n" :doing            Pos="v" :do               
organization     ---> Pos="n" :organization     Pos="v" :organization     
have             ---> Pos="n" :have             Pos="v" :have             
going            ---> Pos="n" :going            Pos="v" :go               
love             ---> Pos="n" :love             Pos="v" :love             
lives            ---> Pos="n" :life             Pos="v" :live             
fly              ---> Pos="n" :fly              Pos="v" :fly              
dies             ---> Pos="n" :dy               Pos="v" :die              
watched          ---> Pos="n" :watched          Pos="v" :watch            
has              ---> Pos="n" :ha               Pos="v" :have             
starting         ---> Pos="n" :starting         Pos="v" :start            
