In [1]:
paragraph = """Resident Evil Village[a] is a 2021 survival horror game developed and published by Capcom. It is the sequel to Resident Evil 7: Biohazard (2017) and the tenth main game of the Resident Evil series. Players control Ethan Winters, who searches for his kidnapped daughter in a village filled with mutant creatures. Village maintains survival horror elements from previous games, with players scavenging environments for items and managing resources while adding more action-oriented gameplay, with higher enemy counts and a greater emphasis on combat.

Resident Evil Village was announced at the PlayStation 5 reveal event in June 2020 and was released for PlayStation 4, PlayStation 5, Stadia, Windows, Xbox One, and Xbox Series X/S on May 7, 2021. This was followed by a macOS version and a cloud version for Nintendo Switch in October 2022, and a PlayStation VR2 version on February 22, 2023. An iOS version was released on October 30, 2023.

Resident Evil Village received generally positive reviews from critics, with praise for its gameplay, setting, and variety, but criticism for its puzzles, boss fights, and performance issues on the Windows version; the increased focus on action divided opinions. The game won year-end accolades including Game of the Year at the Golden Joystick Awards. It had sold over 10 million units by March 2024."""

In [2]:
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

In [4]:
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [5]:
stopwords.words('spanish')

['de',
 'la',
 'que',
 'el',
 'en',
 'y',
 'a',
 'los',
 'del',
 'se',
 'las',
 'por',
 'un',
 'para',
 'con',
 'no',
 'una',
 'su',
 'al',
 'lo',
 'como',
 'más',
 'pero',
 'sus',
 'le',
 'ya',
 'o',
 'este',
 'sí',
 'porque',
 'esta',
 'entre',
 'cuando',
 'muy',
 'sin',
 'sobre',
 'también',
 'me',
 'hasta',
 'hay',
 'donde',
 'quien',
 'desde',
 'todo',
 'nos',
 'durante',
 'todos',
 'uno',
 'les',
 'ni',
 'contra',
 'otros',
 'ese',
 'eso',
 'ante',
 'ellos',
 'e',
 'esto',
 'mí',
 'antes',
 'algunos',
 'qué',
 'unos',
 'yo',
 'otro',
 'otras',
 'otra',
 'él',
 'tanto',
 'esa',
 'estos',
 'mucho',
 'quienes',
 'nada',
 'muchos',
 'cual',
 'poco',
 'ella',
 'estar',
 'estas',
 'algunas',
 'algo',
 'nosotros',
 'mi',
 'mis',
 'tú',
 'te',
 'ti',
 'tu',
 'tus',
 'ellas',
 'nosotras',
 'vosotros',
 'vosotras',
 'os',
 'mío',
 'mía',
 'míos',
 'mías',
 'tuyo',
 'tuya',
 'tuyos',
 'tuyas',
 'suyo',
 'suya',
 'suyos',
 'suyas',
 'nuestro',
 'nuestra',
 'nuestros',
 'nuestras',
 'vuestro'

In [6]:
stopwords.words('arabic')

['إذ',
 'إذا',
 'إذما',
 'إذن',
 'أف',
 'أقل',
 'أكثر',
 'ألا',
 'إلا',
 'التي',
 'الذي',
 'الذين',
 'اللاتي',
 'اللائي',
 'اللتان',
 'اللتيا',
 'اللتين',
 'اللذان',
 'اللذين',
 'اللواتي',
 'إلى',
 'إليك',
 'إليكم',
 'إليكما',
 'إليكن',
 'أم',
 'أما',
 'أما',
 'إما',
 'أن',
 'إن',
 'إنا',
 'أنا',
 'أنت',
 'أنتم',
 'أنتما',
 'أنتن',
 'إنما',
 'إنه',
 'أنى',
 'أنى',
 'آه',
 'آها',
 'أو',
 'أولاء',
 'أولئك',
 'أوه',
 'آي',
 'أي',
 'أيها',
 'إي',
 'أين',
 'أين',
 'أينما',
 'إيه',
 'بخ',
 'بس',
 'بعد',
 'بعض',
 'بك',
 'بكم',
 'بكم',
 'بكما',
 'بكن',
 'بل',
 'بلى',
 'بما',
 'بماذا',
 'بمن',
 'بنا',
 'به',
 'بها',
 'بهم',
 'بهما',
 'بهن',
 'بي',
 'بين',
 'بيد',
 'تلك',
 'تلكم',
 'تلكما',
 'ته',
 'تي',
 'تين',
 'تينك',
 'ثم',
 'ثمة',
 'حاشا',
 'حبذا',
 'حتى',
 'حيث',
 'حيثما',
 'حين',
 'خلا',
 'دون',
 'ذا',
 'ذات',
 'ذاك',
 'ذان',
 'ذانك',
 'ذلك',
 'ذلكم',
 'ذلكما',
 'ذلكن',
 'ذه',
 'ذو',
 'ذوا',
 'ذواتا',
 'ذواتي',
 'ذي',
 'ذين',
 'ذينك',
 'ريث',
 'سوف',
 'سوى',
 'شتان',
 'عدا',
 'عسى',
 'عل'

In [7]:
stopwords.words('russian')

['и',
 'в',
 'во',
 'не',
 'что',
 'он',
 'на',
 'я',
 'с',
 'со',
 'как',
 'а',
 'то',
 'все',
 'она',
 'так',
 'его',
 'но',
 'да',
 'ты',
 'к',
 'у',
 'же',
 'вы',
 'за',
 'бы',
 'по',
 'только',
 'ее',
 'мне',
 'было',
 'вот',
 'от',
 'меня',
 'еще',
 'нет',
 'о',
 'из',
 'ему',
 'теперь',
 'когда',
 'даже',
 'ну',
 'вдруг',
 'ли',
 'если',
 'уже',
 'или',
 'ни',
 'быть',
 'был',
 'него',
 'до',
 'вас',
 'нибудь',
 'опять',
 'уж',
 'вам',
 'ведь',
 'там',
 'потом',
 'себя',
 'ничего',
 'ей',
 'может',
 'они',
 'тут',
 'где',
 'есть',
 'надо',
 'ней',
 'для',
 'мы',
 'тебя',
 'их',
 'чем',
 'была',
 'сам',
 'чтоб',
 'без',
 'будто',
 'чего',
 'раз',
 'тоже',
 'себе',
 'под',
 'будет',
 'ж',
 'тогда',
 'кто',
 'этот',
 'того',
 'потому',
 'этого',
 'какой',
 'совсем',
 'ним',
 'здесь',
 'этом',
 'один',
 'почти',
 'мой',
 'тем',
 'чтобы',
 'нее',
 'сейчас',
 'были',
 'куда',
 'зачем',
 'всех',
 'никогда',
 'можно',
 'при',
 'наконец',
 'два',
 'об',
 'другой',
 'хоть',
 'после',
 'на

In [9]:
stopwords.words('portuguese')

['a',
 'à',
 'ao',
 'aos',
 'aquela',
 'aquelas',
 'aquele',
 'aqueles',
 'aquilo',
 'as',
 'às',
 'até',
 'com',
 'como',
 'da',
 'das',
 'de',
 'dela',
 'delas',
 'dele',
 'deles',
 'depois',
 'do',
 'dos',
 'e',
 'é',
 'ela',
 'elas',
 'ele',
 'eles',
 'em',
 'entre',
 'era',
 'eram',
 'éramos',
 'essa',
 'essas',
 'esse',
 'esses',
 'esta',
 'está',
 'estamos',
 'estão',
 'estar',
 'estas',
 'estava',
 'estavam',
 'estávamos',
 'este',
 'esteja',
 'estejam',
 'estejamos',
 'estes',
 'esteve',
 'estive',
 'estivemos',
 'estiver',
 'estivera',
 'estiveram',
 'estivéramos',
 'estiverem',
 'estivermos',
 'estivesse',
 'estivessem',
 'estivéssemos',
 'estou',
 'eu',
 'foi',
 'fomos',
 'for',
 'fora',
 'foram',
 'fôramos',
 'forem',
 'formos',
 'fosse',
 'fossem',
 'fôssemos',
 'fui',
 'há',
 'haja',
 'hajam',
 'hajamos',
 'hão',
 'havemos',
 'haver',
 'hei',
 'houve',
 'houvemos',
 'houver',
 'houvera',
 'houverá',
 'houveram',
 'houvéramos',
 'houverão',
 'houverei',
 'houverem',
 'hou

In [13]:
stopwords.words('turkish')

['acaba',
 'ama',
 'aslında',
 'az',
 'bazı',
 'belki',
 'biri',
 'birkaç',
 'birşey',
 'biz',
 'bu',
 'çok',
 'çünkü',
 'da',
 'daha',
 'de',
 'defa',
 'diye',
 'eğer',
 'en',
 'gibi',
 'hem',
 'hep',
 'hepsi',
 'her',
 'hiç',
 'için',
 'ile',
 'ise',
 'kez',
 'ki',
 'kim',
 'mı',
 'mu',
 'mü',
 'nasıl',
 'ne',
 'neden',
 'nerde',
 'nerede',
 'nereye',
 'niçin',
 'niye',
 'o',
 'sanki',
 'şey',
 'siz',
 'şu',
 'tüm',
 've',
 'veya',
 'ya',
 'yani']

In [15]:
stopwords.words('chinese')

['一',
 '一下',
 '一些',
 '一切',
 '一则',
 '一天',
 '一定',
 '一方面',
 '一旦',
 '一时',
 '一来',
 '一样',
 '一次',
 '一片',
 '一直',
 '一致',
 '一般',
 '一起',
 '一边',
 '一面',
 '万一',
 '上下',
 '上升',
 '上去',
 '上来',
 '上述',
 '上面',
 '下列',
 '下去',
 '下来',
 '下面',
 '不一',
 '不久',
 '不仅',
 '不会',
 '不但',
 '不光',
 '不单',
 '不变',
 '不只',
 '不可',
 '不同',
 '不够',
 '不如',
 '不得',
 '不怕',
 '不惟',
 '不成',
 '不拘',
 '不敢',
 '不断',
 '不是',
 '不比',
 '不然',
 '不特',
 '不独',
 '不管',
 '不能',
 '不要',
 '不论',
 '不足',
 '不过',
 '不问',
 '与',
 '与其',
 '与否',
 '与此同时',
 '专门',
 '且',
 '两者',
 '严格',
 '严重',
 '个',
 '个人',
 '个别',
 '中小',
 '中间',
 '丰富',
 '临',
 '为',
 '为主',
 '为了',
 '为什么',
 '为什麽',
 '为何',
 '为着',
 '主张',
 '主要',
 '举行',
 '乃',
 '乃至',
 '么',
 '之',
 '之一',
 '之前',
 '之后',
 '之後',
 '之所以',
 '之类',
 '乌乎',
 '乎',
 '乘',
 '也',
 '也好',
 '也是',
 '也罢',
 '了',
 '了解',
 '争取',
 '于',
 '于是',
 '于是乎',
 '云云',
 '互相',
 '产生',
 '人们',
 '人家',
 '什么',
 '什么样',
 '什麽',
 '今后',
 '今天',
 '今年',
 '今後',
 '仍然',
 '从',
 '从事',
 '从而',
 '他',
 '他人',
 '他们',
 '他的',
 '代替',
 '以',
 '以上',
 '以下',
 '以为',
 '以便',
 '以免',
 '以前',
 '以及',
 '以后',
 '以外',
 '以後',
 

In [18]:
print(stopwords.fileids())

['arabic', 'azerbaijani', 'basque', 'bengali', 'catalan', 'chinese', 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'greek', 'hebrew', 'hinglish', 'hungarian', 'indonesian', 'italian', 'kazakh', 'nepali', 'norwegian', 'portuguese', 'romanian', 'russian', 'slovene', 'spanish', 'swedish', 'tajik', 'turkish']


In [19]:
stopwords.words('nepali')

['छ',
 'र',
 'पनि',
 'छन्',
 'लागि',
 'भएको',
 'गरेको',
 'भने',
 'गर्न',
 'गर्ने',
 'हो',
 'तथा',
 'यो',
 'रहेको',
 'उनले',
 'थियो',
 'हुने',
 'गरेका',
 'थिए',
 'गर्दै',
 'तर',
 'नै',
 'को',
 'मा',
 'हुन्',
 'भन्ने',
 'हुन',
 'गरी',
 'त',
 'हुन्छ',
 'अब',
 'के',
 'रहेका',
 'गरेर',
 'छैन',
 'दिए',
 'भए',
 'यस',
 'ले',
 'गर्नु',
 'औं',
 'सो',
 'त्यो',
 'कि',
 'जुन',
 'यी',
 'का',
 'गरि',
 'ती',
 'न',
 'छु',
 'छौं',
 'लाई',
 'नि',
 'उप',
 'अक्सर',
 'आदि',
 'कसरी',
 'क्रमशः',
 'चाले',
 'अगाडी',
 'अझै',
 'अनुसार',
 'अन्तर्गत',
 'अन्य',
 'अन्यत्र',
 'अन्यथा',
 'अरु',
 'अरुलाई',
 'अर्को',
 'अर्थात',
 'अर्थात्',
 'अलग',
 'आए',
 'आजको',
 'ओठ',
 'आत्म',
 'आफू',
 'आफूलाई',
 'आफ्नै',
 'आफ्नो',
 'आयो',
 'उदाहरण',
 'उनको',
 'उहालाई',
 'एउटै',
 'एक',
 'एकदम',
 'कतै',
 'कम से कम',
 'कसै',
 'कसैले',
 'कहाँबाट',
 'कहिलेकाहीं',
 'का',
 'किन',
 'किनभने',
 'कुनै',
 'कुरा',
 'कृपया',
 'केही',
 'कोही',
 'गए',
 'गरौं',
 'गर्छ',
 'गर्छु',
 'गर्नुपर्छ',
 'गयौ',
 'गैर',
 'चार',
 'चाहनुहुन्छ',
 'चाहन्छु',
 'चाहिए

In [23]:
languages = stopwords.fileids()
for i in languages:
    print(i)
    print(stopwords.words(i))
    print()

arabic
['إذ', 'إذا', 'إذما', 'إذن', 'أف', 'أقل', 'أكثر', 'ألا', 'إلا', 'التي', 'الذي', 'الذين', 'اللاتي', 'اللائي', 'اللتان', 'اللتيا', 'اللتين', 'اللذان', 'اللذين', 'اللواتي', 'إلى', 'إليك', 'إليكم', 'إليكما', 'إليكن', 'أم', 'أما', 'أما', 'إما', 'أن', 'إن', 'إنا', 'أنا', 'أنت', 'أنتم', 'أنتما', 'أنتن', 'إنما', 'إنه', 'أنى', 'أنى', 'آه', 'آها', 'أو', 'أولاء', 'أولئك', 'أوه', 'آي', 'أي', 'أيها', 'إي', 'أين', 'أين', 'أينما', 'إيه', 'بخ', 'بس', 'بعد', 'بعض', 'بك', 'بكم', 'بكم', 'بكما', 'بكن', 'بل', 'بلى', 'بما', 'بماذا', 'بمن', 'بنا', 'به', 'بها', 'بهم', 'بهما', 'بهن', 'بي', 'بين', 'بيد', 'تلك', 'تلكم', 'تلكما', 'ته', 'تي', 'تين', 'تينك', 'ثم', 'ثمة', 'حاشا', 'حبذا', 'حتى', 'حيث', 'حيثما', 'حين', 'خلا', 'دون', 'ذا', 'ذات', 'ذاك', 'ذان', 'ذانك', 'ذلك', 'ذلكم', 'ذلكما', 'ذلكن', 'ذه', 'ذو', 'ذوا', 'ذواتا', 'ذواتي', 'ذي', 'ذين', 'ذينك', 'ريث', 'سوف', 'سوى', 'شتان', 'عدا', 'عسى', 'عل', 'على', 'عليك', 'عليه', 'عما', 'عن', 'عند', 'غير', 'فإذا', 'فإن', 'فلا', 'فمن', 'في', 'فيم', 'فيما', 'فيه', 'ف

In [24]:
stemmer = PorterStemmer()

In [29]:
from nltk.tokenize import sent_tokenize, word_tokenize
sentences = sent_tokenize(paragraph)

In [30]:
#Apply stopwords and filter and the apply stemming
for i in range(len(sentences)):
   words = word_tokenize(sentences[i])
   words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
   sentences[i] = ' '.join(words)

In [31]:
sentences

['resid evil villag [ ] 2021 surviv horror game develop publish capcom .',
 'it sequel resid evil 7 : biohazard ( 2017 ) tenth main game resid evil seri .',
 'player control ethan winter , search kidnap daughter villag fill mutant creatur .',
 'villag maintain surviv horror element previou game , player scaveng environ item manag resourc ad action-ori gameplay , higher enemi count greater emphasi combat .',
 'resid evil villag announc playstat 5 reveal event june 2020 releas playstat 4 , playstat 5 , stadia , window , xbox one , xbox seri x/ may 7 , 2021 .',
 'thi follow maco version cloud version nintendo switch octob 2022 , playstat vr2 version februari 22 , 2023 .',
 'an io version releas octob 30 , 2023 .',
 'resid evil villag receiv gener posit review critic , prais gameplay , set , varieti , critic puzzl , boss fight , perform issu window version ; increas focu action divid opinion .',
 'the game year-end accolad includ game year golden joystick award .',
 'it sold 10 million uni

In [33]:
from nltk.stem import SnowballStemmer
snow_stemmer = SnowballStemmer('english')
sentences = sent_tokenize(paragraph)

In [34]:
#Apply stopwords and filter and the apply snowball stemming
for i in range(len(sentences)):
   words = word_tokenize(sentences[i])
   words = [snow_stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
   sentences[i] = ' '.join(words)

In [36]:
sentences

['resid evil villag [ ] 2021 surviv horror game develop publish capcom .',
 'it sequel resid evil 7 : biohazard ( 2017 ) tenth main game resid evil seri .',
 'player control ethan winter , search kidnap daughter villag fill mutant creatur .',
 'villag maintain surviv horror element previous game , player scaveng environ item manag resourc ad action-ori gameplay , higher enemi count greater emphasi combat .',
 'resid evil villag announc playstat 5 reveal event june 2020 releas playstat 4 , playstat 5 , stadia , window , xbox one , xbox seri x/s may 7 , 2021 .',
 'this follow maco version cloud version nintendo switch octob 2022 , playstat vr2 version februari 22 , 2023 .',
 'an io version releas octob 30 , 2023 .',
 'resid evil villag receiv general posit review critic , prais gameplay , set , varieti , critic puzzl , boss fight , perform issu window version ; increas focus action divid opinion .',
 'the game year-end accolad includ game year golden joystick award .',
 'it sold 10 milli

In [42]:
from nltk.stem import WordNetLemmatizer
lemma = WordNetLemmatizer()
sentences = sent_tokenize(paragraph)

In [43]:
#Apply stopwords and filter and the apply lemmatization
for i in range(len(sentences)):
   words = word_tokenize(sentences[i])
   words = [lemma.lemmatize(word,pos='v') for word in words if word not in set(stopwords.words('english'))]
   sentences[i] = ' '.join(words)

In [44]:
sentences

['Resident Evil Village [ ] 2021 survival horror game develop publish Capcom .',
 'It sequel Resident Evil 7 : Biohazard ( 2017 ) tenth main game Resident Evil series .',
 'Players control Ethan Winters , search kidnap daughter village fill mutant creatures .',
 'Village maintain survival horror elements previous game , players scavenge environments items manage resources add action-oriented gameplay , higher enemy count greater emphasis combat .',
 'Resident Evil Village announce PlayStation 5 reveal event June 2020 release PlayStation 4 , PlayStation 5 , Stadia , Windows , Xbox One , Xbox Series X/S May 7 , 2021 .',
 'This follow macOS version cloud version Nintendo Switch October 2022 , PlayStation VR2 version February 22 , 2023 .',
 'An iOS version release October 30 , 2023 .',
 'Resident Evil Village receive generally positive review critics , praise gameplay , set , variety , criticism puzzle , boss fight , performance issue Windows version ; increase focus action divide opinions