In [1]:
import nltk # natural language toolkit
import re # Regula expression
import numpy as np
import warnings
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer

warnings.filterwarnings("ignore")

In [2]:
paragraph = """
Samurai (侍) or bushi (武士, [bɯ.ɕi]) were members of the warrior class in Japan. They were originally provincial warriors who served the Kuge and imperial court in the late 12th century. Samurai eventually came to play a major political role until their abolition in the late 1870s during the Meiji era.[1][2]

In the Heian period, powerful regional clans were relied on to put down rebellions. After power struggles, the Taira clan defeated the Minamoto clan in 1160.[3] After the Minamoto defeated the Taira in 1185, Minamoto no Yoritomo established the Kamakura shogunate, a parallel government that did not supplant the imperial court.[4][5] The warriors who served the Shogunate were called gokenin, landholding warriors whose retainers were called samurai.[6][7] Gokenin were regulated by the Samurai-dokoro.
"""

In [3]:
# initialise necessary objects

ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [4]:
sentences = sent_tokenize(paragraph)

In [5]:
sentences

['\nSamurai (侍) or bushi (武士, [bɯ.ɕi]) were members of the warrior class in Japan.',
 'They were originally provincial warriors who served the Kuge and imperial court in the late 12th century.',
 'Samurai eventually came to play a major political role until their abolition in the late 1870s during the Meiji era.',
 '[1][2]\n\nIn the Heian period, powerful regional clans were relied on to put down rebellions.',
 'After power struggles, the Taira clan defeated the Minamoto clan in 1160.',
 '[3] After the Minamoto defeated the Taira in 1185, Minamoto no Yoritomo established the Kamakura shogunate, a parallel government that did not supplant the imperial court.',
 '[4][5] The warriors who served the Shogunate were called gokenin, landholding warriors whose retainers were called samurai.',
 '[6][7] Gokenin were regulated by the Samurai-dokoro.']

In [6]:
# Stopwords handling

try:
    stop_words = set(stopwords.words("english"))
except:
    nltk.download("stopwords")
    stop_words = set(stopwords.words("english"))

In [10]:
# Preprocessing text

corpus = []

for sentence in sentences:
    review = re.sub(r'[^a-zA-Z]', ' ', sentence)  # Remove non-alphabetic characters
    review = review.lower()  # Convert to lowercase
    review = review.split()  # Tokenize words
    
    # Stemming
    review = [ps.stem(word) for word in review if word not in stop_words]
    
    # Join words back into a single string
    review = ' '.join(review)
    
    corpus.append(review)

In [11]:
# Creating bag of words model

cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()

In [12]:
# Model output

print("Output from bag of words model is:\n ", X[ :3])

Output from bag of words model is:
  [[0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 1 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0
  0 0 0 0 0 1 0 0 0 0 1 1 0 0]
 [1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0
  0 0 0 1 1 0 0 0 0 0 0 0 0 0]]


In [9]:
def check_odd_even(): 
    try: 
        num = int(input("Enter a number: ")) # Taking user input 
        if num % 2 == 0: 
            print(f"{num} is an Even number.") 
        else: 
            print(f"{num} is an Odd number.") 
    except ValueError: 
        print("Invalid input! Please enter a valid integer.") 
# Run the function 
check_odd_even()

Enter a number:  asdfsafda


Invalid input! Please enter a valid integer.
