### Creation of Dataset

In [1]:
import pandas as pd
import requests

In [2]:
url = 'https://api.themoviedb.org/3/movie/top_rated?api_key=8265bd1679663a7ea12ac168da84d2e8&language=en-US&page=1'

In [3]:
r = requests.get(url)

In [4]:
json = r.json()

In [5]:
json

{'page': 1,
 'results': [{'adult': False,
   'backdrop_path': '/tmU7GeKVybMWFButWEGl2M4GeiP.jpg',
   'genre_ids': [18, 80],
   'id': 238,
   'original_language': 'en',
   'original_title': 'The Godfather',
   'overview': 'Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime family. When organized crime family patriarch, Vito Corleone barely survives an attempt on his life, his youngest son, Michael steps in to take care of the would-be killers, launching a campaign of bloody revenge.',
   'popularity': 121.392,
   'poster_path': '/3bhkrj58Vtu7enYsRolD1fZdja1.jpg',
   'release_date': '1972-03-14',
   'title': 'The Godfather',
   'video': False,
   'vote_average': 8.7,
   'vote_count': 17282},
  {'adult': False,
   'backdrop_path': '/wPU78OPN4BYEgWYdXyg0phMee64.jpg',
   'genre_ids': [18, 80],
   'id': 278,
   'original_language': 'en',
   'original_title': 'The Shawshank Redemption',
   'overview': 'Framed in the 1940s for the double murder of his

In [6]:
json.keys()

dict_keys(['page', 'results', 'total_pages', 'total_results'])

In [7]:
titles=[]
for value in json['results']:
   titles.append(value['title'])

In [8]:
titles

['The Godfather',
 'The Shawshank Redemption',
 'Puss in Boots: The Last Wish',
 'The Godfather Part II',
 "Schindler's List",
 'Dilwale Dulhania Le Jayenge',
 'Spirited Away',
 '12 Angry Men',
 'Your Name.',
 'Parasite',
 'Dou kyu sei – Classmates',
 'The Green Mile',
 'The Dark Knight',
 'Pulp Fiction',
 'The Good, the Bad and the Ugly',
 'Forrest Gump',
 "Gabriel's Inferno",
 'The Lord of the Rings: The Return of the King',
 'GoodFellas',
 "Gabriel's Inferno: Part II"]

In [9]:
overview=[]
for value in json['results']:
   overview.append(value['overview'])

In [10]:
overview

['Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime family. When organized crime family patriarch, Vito Corleone barely survives an attempt on his life, his youngest son, Michael steps in to take care of the would-be killers, launching a campaign of bloody revenge.',
 'Framed in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope.',
 'Puss in Boots discovers that his passion for adventure has taken its toll: He has burned through eight of his nine lives, leaving him with only one life left. Puss sets out on an epic journey to find the mythical Last Wish and restore his nine lives.',
 'In the continuing saga of the Corleone

In [11]:
genre_ids=[]
for value in json['results']:
   genre_ids.append(value['genre_ids'])

In [12]:
genre_ids

[[18, 80],
 [18, 80],
 [16, 28, 12, 35, 10751, 14],
 [18, 80],
 [18, 36, 10752],
 [35, 18, 10749],
 [16, 10751, 14],
 [18],
 [10749, 16, 18],
 [35, 53, 18],
 [10749, 16],
 [14, 18, 80],
 [18, 28, 80, 53],
 [53, 80],
 [37],
 [35, 18, 10749],
 [10749],
 [12, 14, 28],
 [18, 80],
 [10749]]

In [13]:
df = pd.DataFrame(list(zip(titles, overview, genre_ids)),columns =['Title', 'Description', 'Genre'])
df

Unnamed: 0,Title,Description,Genre
0,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...","[18, 80]"
1,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,"[18, 80]"
2,Puss in Boots: The Last Wish,Puss in Boots discovers that his passion for a...,"[16, 28, 12, 35, 10751, 14]"
3,The Godfather Part II,In the continuing saga of the Corleone crime f...,"[18, 80]"
4,Schindler's List,The true story of how businessman Oskar Schind...,"[18, 36, 10752]"
5,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...","[35, 18, 10749]"
6,Spirited Away,"A young girl, Chihiro, becomes trapped in a st...","[16, 10751, 14]"
7,12 Angry Men,The defense and the prosecution have rested an...,[18]
8,Your Name.,High schoolers Mitsuha and Taki are complete s...,"[10749, 16, 18]"
9,Parasite,"All unemployed, Ki-taek's family takes peculia...","[35, 53, 18]"


### Text Preprocessing
* Convert to lowercase
* Remove html tags
* Remove URL's
* Remove Punctuation
* Chat Words Treatment (FYI, ASAP etc.)
* Spelling Correction
* Removing Stopwords
* Handling Emoji's
* Tokenization
* Stemming
* Lemmatization


In [14]:
# Lowercase
df['Description']=df['Description'].str.lower()

In [15]:
# Remove html tags
import re
def remove_html_tags(text):
    pattern = re.compile('<.*?>')
    return pattern.sub(r'', text)

In [16]:
df['Description'] = df['Description'].apply(remove_html_tags)

In [17]:
# Remove URL's
def remove_url(text):
    pattern = re.compile(r'https?://\S+|www\.\S+')
    return pattern.sub(r'', text)

In [18]:
df['Description'] = df['Description'].apply(remove_url)

In [19]:
# Remove Punctuations
import string
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [20]:
exclude = string.punctuation

In [21]:
def remove_punc1(text):
    return text.translate(str.maketrans('', '', exclude))

In [22]:
df['Description'] = df['Description'].apply(remove_punc1)

In [23]:
# Chatword Treatment
chat_words = {
    "$" : " dollar ",
    "€" : " euro ",
    "4ao" : "for adults only",
    "a.m" : "before midday",
    "a3" : "anytime anywhere anyplace",
    "aamof" : "as a matter of fact",
    "acct" : "account",
    "adih" : "another day in hell",
    "afaic" : "as far as i am concerned",
    "afaict" : "as far as i can tell",
    "afaik" : "as far as i know",
    "afair" : "as far as i remember",
    "afk" : "away from keyboard",
    "app" : "application",
    "approx" : "approximately",
    "apps" : "applications",
    "asap" : "as soon as possible",
    "asl" : "age, sex, location",
    "atk" : "at the keyboard",
    "ave." : "avenue",
    "aymm" : "are you my mother",
    "ayor" : "at your own risk", 
    "b&b" : "bed and breakfast",
    "b+b" : "bed and breakfast",
    "b.c" : "before christ",
    "b2b" : "business to business",
    "b2c" : "business to customer",
    "b4" : "before",
    "b4n" : "bye for now",
    "b@u" : "back at you",
    "bae" : "before anyone else",
    "bak" : "back at keyboard",
    "bbbg" : "bye bye be good",
    "bbc" : "british broadcasting corporation",
    "bbias" : "be back in a second",
    "bbl" : "be back later",
    "bbs" : "be back soon",
    "be4" : "before",
    "bfn" : "bye for now",
    "blvd" : "boulevard",
    "bout" : "about",
    "brb" : "be right back",
    "bros" : "brothers",
    "brt" : "be right there",
    "bsaaw" : "big smile and a wink",
    "btw" : "by the way",
    "bwl" : "bursting with laughter",
    "c/o" : "care of",
    "cet" : "central european time",
    "cf" : "compare",
    "cia" : "central intelligence agency",
    "csl" : "can not stop laughing",
    "cu" : "see you",
    "cul8r" : "see you later",
    "cv" : "curriculum vitae",
    "cwot" : "complete waste of time",
    "cya" : "see you",
    "cyt" : "see you tomorrow",
    "dae" : "does anyone else",
    "dbmib" : "do not bother me i am busy",
    "diy" : "do it yourself",
    "dm" : "direct message",
    "dwh" : "during work hours",
    "e123" : "easy as one two three",
    "eet" : "eastern european time",
    "eg" : "example",
    "embm" : "early morning business meeting",
    "encl" : "enclosed",
    "encl." : "enclosed",
    "etc" : "and so on",
    "faq" : "frequently asked questions",
    "fawc" : "for anyone who cares",
    "fb" : "facebook",
    "fc" : "fingers crossed",
    "fig" : "figure",
    "fimh" : "forever in my heart", 
    "ft." : "feet",
    "ft" : "featuring",
    "ftl" : "for the loss",
    "ftw" : "for the win",
    "fwiw" : "for what it is worth",
    "fyi" : "for your information",
    "g9" : "genius",
    "gahoy" : "get a hold of yourself",
    "gal" : "get a life",
    "gcse" : "general certificate of secondary education",
    "gfn" : "gone for now",
    "gg" : "good game",
    "gl" : "good luck",
    "glhf" : "good luck have fun",
    "gmt" : "greenwich mean time",
    "gmta" : "great minds think alike",
    "gn" : "good night",
    "g.o.a.t" : "greatest of all time",
    "goat" : "greatest of all time",
    "goi" : "get over it",
    "gps" : "global positioning system",
    "gr8" : "great",
    "gratz" : "congratulations",
    "gyal" : "girl",
    "h&c" : "hot and cold",
    "hp" : "horsepower",
    "hr" : "hour",
    "hrh" : "his royal highness",
    "ht" : "height",
    "ibrb" : "i will be right back",
    "ic" : "i see",
    "icq" : "i seek you",
    "icymi" : "in case you missed it",
    "idc" : "i do not care",
    "idgadf" : "i do not give a damn fuck",
    "idgaf" : "i do not give a fuck",
    "idk" : "i do not know",
    "ie" : "that is",
    "i.e" : "that is",
    "ifyp" : "i feel your pain",
    "IG" : "instagram",
    "iirc" : "if i remember correctly",
    "ilu" : "i love you",
    "ily" : "i love you",
    "imho" : "in my humble opinion",
    "imo" : "in my opinion",
    "imu" : "i miss you",
    "iow" : "in other words",
    "irl" : "in real life",
    "j4f" : "just for fun",
    "jic" : "just in case",
    "jk" : "just kidding",
    "jsyk" : "just so you know",
    "l8r" : "later",
    "lb" : "pound",
    "lbs" : "pounds",
    "ldr" : "long distance relationship",
    "lmao" : "laugh my ass off",
    "lmfao" : "laugh my fucking ass off",
    "lol" : "laughing out loud",
    "ltd" : "limited",
    "ltns" : "long time no see",
    "m8" : "mate",
    "mf" : "motherfucker",
    "mfs" : "motherfuckers",
    "mfw" : "my face when",
    "mofo" : "motherfucker",
    "mph" : "miles per hour",
    "mr" : "mister",
    "mrw" : "my reaction when",
    "ms" : "miss",
    "mte" : "my thoughts exactly",
    "nagi" : "not a good idea",
    "nbc" : "national broadcasting company",
    "nbd" : "not big deal",
    "nfs" : "not for sale",
    "ngl" : "not going to lie",
    "nhs" : "national health service",
    "nrn" : "no reply necessary",
    "nsfl" : "not safe for life",
    "nsfw" : "not safe for work",
    "nth" : "nice to have",
    "nvr" : "never",
    "nyc" : "new york city",
    "oc" : "original content",
    "og" : "original",
    "ohp" : "overhead projector",
    "oic" : "oh i see",
    "omdb" : "over my dead body",
    "omg" : "oh my god",
    "omw" : "on my way",
    "p.a" : "per annum",
    "p.m" : "after midday",
    "pm" : "prime minister",
    "poc" : "people of color",
    "pov" : "point of view",
    "pp" : "pages",
    "ppl" : "people",
    "prw" : "parents are watching",
    "ps" : "postscript",
    "pt" : "point",
    "ptb" : "please text back",
    "pto" : "please turn over",
    "qpsa" : "what happens", #"que pasa",
    "ratchet" : "rude",
    "rbtl" : "read between the lines",
    "rlrt" : "real life retweet", 
    "rofl" : "rolling on the floor laughing",
    "roflol" : "rolling on the floor laughing out loud",
    "rotflmao" : "rolling on the floor laughing my ass off",
    "rt" : "retweet",
    "ruok" : "are you ok",
    "sfw" : "safe for work",
    "sk8" : "skate",
    "smh" : "shake my head",
    "sq" : "square",
    "srsly" : "seriously", 
    "ssdd" : "same stuff different day",
    "tbh" : "to be honest",
    "tbs" : "tablespooful",
    "tbsp" : "tablespooful",
    "tfw" : "that feeling when",
    "thks" : "thank you",
    "tho" : "though",
    "thx" : "thank you",
    "tia" : "thanks in advance",
    "til" : "today i learned",
    "tl;dr" : "too long i did not read",
    "tldr" : "too long i did not read",
    "tmb" : "tweet me back",
    "tntl" : "trying not to laugh",
    "ttyl" : "talk to you later",
    "u" : "you",
    "u2" : "you too",
    "u4e" : "yours for ever",
    "utc" : "coordinated universal time",
    "w/" : "with",
    "w/o" : "without",
    "w8" : "wait",
    "wassup" : "what is up",
    "wb" : "welcome back",
    "wtf" : "what the fuck",
    "wtg" : "way to go",
    "wtpa" : "where the party at",
    "wuf" : "where are you from",
    "wuzup" : "what is up",
    "wywh" : "wish you were here",
    "yd" : "yard",
    "ygtr" : "you got that right",
    "ynk" : "you never know",
    "zzz" : "sleeping bored and tired"
}

In [24]:
def chat_conversion(text):
    new_text = []
    for w in text.split():
        if w.lower() in chat_words:
            new_text.append(chat_words[w.lower()])
        else:
            new_text.append(w)
    return " ".join(new_text)

In [25]:
chat_conversion('IMHO he is the best')

'in my humble opinion he is the best'

In [26]:
df['Description'] = df['Description'].apply(chat_conversion)

In [27]:
# Spelling Correction
from textblob import TextBlob

In [28]:
incorrect_text = 'ceertain conditionas duriing seveal ggenerations aree moodified in the saame maner.'

textBlb = TextBlob(incorrect_text)

textBlb.correct().string

'certain conditions during several generations are modified in the same manner.'

In [29]:
df['Description']

0     spanning the years 1945 to 1955 a chronicle of...
1     framed in the 1940s for the double murder of h...
2     puss in boots discovers that his passion for a...
3     in the continuing saga of the corleone crime f...
4     the true story of how businessman oskar schind...
5     raj is a rich carefree happygolucky second gen...
6     a young girl chihiro becomes trapped in a stra...
7     the defense and the prosecution have rested an...
8     high schoolers mitsuha and taki are complete s...
9     all unemployed kitaeks family takes peculiar i...
10    rihito sajo an honor student with a perfect sc...
11    a supernatural tale set on death row in a sout...
12    batman raises the stakes in his war on crime w...
13    a burgerloving hit man his philosophical partn...
14    while the civil war rages on between the union...
15    a man with a low iq has accomplished great thi...
16    an intriguing and sinful exploration of seduct...
17    aragorn is revealed as the heir to the anc

In [30]:
# Removing Stopwords
from nltk.corpus import stopwords

In [31]:
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [32]:
def remove_stopwords(text):
    new_text = []
    
    for word in text.split():
        if word in stopwords.words('english'):
            new_text.append('')
        else:
            new_text.append(word)
    x = new_text[:]
    new_text.clear()
    return " ".join(x)

In [33]:
remove_stopwords('probably my all-time favorite movie, a story of selflessness, sacrifice and dedication to a noble cause, but it\'s not preachy or boring. it just never gets old, despite my having seen it some 15 or more times')

'probably  all-time favorite movie,  story  selflessness, sacrifice  dedication   noble cause,    preachy  boring.   never gets old, despite   seen   15   times'

In [34]:
df.head()

Unnamed: 0,Title,Description,Genre
0,The Godfather,spanning the years 1945 to 1955 a chronicle of...,"[18, 80]"
1,The Shawshank Redemption,framed in the 1940s for the double murder of h...,"[18, 80]"
2,Puss in Boots: The Last Wish,puss in boots discovers that his passion for a...,"[16, 28, 12, 35, 10751, 14]"
3,The Godfather Part II,in the continuing saga of the corleone crime f...,"[18, 80]"
4,Schindler's List,the true story of how businessman oskar schind...,"[18, 36, 10752]"


In [35]:
df['Description'].apply(remove_stopwords)

0     spanning  years 1945  1955  chronicle   fictio...
1     framed   1940s   double murder   wife   lover ...
2     puss  boots discovers   passion  adventure  ta...
3       continuing saga   corleone crime family  you...
4      true story   businessman oskar schindler save...
5     raj   rich carefree happygolucky second genera...
6      young girl chihiro becomes trapped   strange ...
7      defense   prosecution  rested   jury  filing ...
8     high schoolers mitsuha  taki  complete strange...
9      unemployed kitaeks family takes peculiar inte...
10    rihito sajo  honor student   perfect score   e...
11     supernatural tale set  death row   southern p...
12    batman raises  stakes   war  crime   help  lt ...
13     burgerloving hit man  philosophical partner  ...
14      civil war rages    union   confederacy three...
15     man   low iq  accomplished great things   lif...
16     intriguing  sinful exploration  seduction for...
17    aragorn  revealed   heir   ancient kings  

In [36]:
# Handling Emojis
import re
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

In [37]:
remove_emoji("Loved the movie. It was 😘😘")

'Loved the movie. It was '

In [38]:
import emoji
print(emoji.demojize('Python is 🔥'))

Python is :fire:


In [55]:
df['Description'] = df['Description'].apply(remove_emoji)

In [39]:
# Tokenization
from nltk.tokenize import word_tokenize,sent_tokenize

In [40]:
sent1 = 'I am going to visit delhi!'
word_tokenize(sent1)

['I', 'am', 'going', 'to', 'visit', 'delhi', '!']

In [41]:
text = """Lorem Ipsum is simply dummy text of the printing and typesetting industry? 
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, 
when an unknown printer took a galley of type and scrambled it to make a type specimen book."""

sent_tokenize(text)

['Lorem Ipsum is simply dummy text of the printing and typesetting industry?',
 "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, \nwhen an unknown printer took a galley of type and scrambled it to make a type specimen book."]

In [42]:
sent5 = 'I have a Ph.D in A.I'
sent6 = "We're here to help! mail us at nks@gmail.com"
sent7 = 'A 5km ride cost $10.50'

word_tokenize(sent5)

['I', 'have', 'a', 'Ph.D', 'in', 'A.I']

In [43]:
word_tokenize(sent6)

['We',
 "'re",
 'here',
 'to',
 'help',
 '!',
 'mail',
 'us',
 'at',
 'nks',
 '@',
 'gmail.com']

In [59]:
df['Description']

0     spanning the years 1945 to 1955 a chronicle of...
1     framed in the 1940s for the double murder of h...
2     puss in boots discovers that his passion for a...
3     in the continuing saga of the corleone crime f...
4     the true story of how businessman oskar schind...
5     raj is a rich carefree happygolucky second gen...
6     a young girl chihiro becomes trapped in a stra...
7     the defense and the prosecution have rested an...
8     high schoolers mitsuha and taki are complete s...
9     all unemployed kitaeks family takes peculiar i...
10    rihito sajo an honor student with a perfect sc...
11    a supernatural tale set on death row in a sout...
12    batman raises the stakes in his war on crime w...
13    a burgerloving hit man his philosophical partn...
14    while the civil war rages on between the union...
15    a man with a low iq has accomplished great thi...
16    an intriguing and sinful exploration of seduct...
17    aragorn is revealed as the heir to the anc

In [44]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [45]:
doc1 = nlp(sent5)
doc2 = nlp(sent6)
doc3 = nlp(sent7)
doc4 = nlp(sent1)

In [46]:
for token in doc4:
    print(token)

I
am
going
to
visit
delhi
!


In [47]:
from nltk.stem.porter import PorterStemmer

In [48]:
ps = PorterStemmer()
def stem_words(text):
    return " ".join([ps.stem(word) for word in text.split()])

In [49]:
sample = "walk walks walking walked"
stem_words(sample)

'walk walk walk walk'

In [50]:
text = 'probably my alltime favorite movie a story of selflessness sacrifice and dedication to a noble cause but its not preachy or boring it just never gets old despite my having seen it some 15 or more times in the last 25 years paul lukas performance brings tears to my eyes and bette davis in one of her very few truly sympathetic roles is a delight the kids are as grandma says more like dressedup midgets than children but that only makes them more fun to watch and the mothers slow awakening to whats happening in the world and under her own roof is believable and startling if i had a dozen thumbs theyd all be up for this movie'
print(text)

probably my alltime favorite movie a story of selflessness sacrifice and dedication to a noble cause but its not preachy or boring it just never gets old despite my having seen it some 15 or more times in the last 25 years paul lukas performance brings tears to my eyes and bette davis in one of her very few truly sympathetic roles is a delight the kids are as grandma says more like dressedup midgets than children but that only makes them more fun to watch and the mothers slow awakening to whats happening in the world and under her own roof is believable and startling if i had a dozen thumbs theyd all be up for this movie


In [51]:
stem_words(text)

'probabl my alltim favorit movi a stori of selfless sacrific and dedic to a nobl caus but it not preachi or bore it just never get old despit my have seen it some 15 or more time in the last 25 year paul luka perform bring tear to my eye and bett davi in one of her veri few truli sympathet role is a delight the kid are as grandma say more like dressedup midget than children but that onli make them more fun to watch and the mother slow awaken to what happen in the world and under her own roof is believ and startl if i had a dozen thumb theyd all be up for thi movi'

In [54]:
#Lemmatization
import nltk
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()

sentence = "He was running and eating at same time. He has bad habit of swimming after playing long hours in the Sun."
punctuations="?:!.,;"
sentence_words = nltk.word_tokenize(sentence)
for word in sentence_words:
    if word in punctuations:
        sentence_words.remove(word)

sentence_words
print("{0:20}{1:20}".format("Word","Lemma"))
for word in sentence_words:
    print ("{0:20}{1:20}".format(word,wordnet_lemmatizer.lemmatize(word,pos='v')))

Word                Lemma               
He                  He                  
was                 be                  
running             run                 
and                 and                 
eating              eat                 
at                  at                  
same                same                
time                time                
He                  He                  
has                 have                
bad                 bad                 
habit               habit               
of                  of                  
swimming            swim                
after               after               
playing             play                
long                long                
hours               hours               
in                  in                  
the                 the                 
Sun                 Sun                 
