In [1]:
import re
from nltk.corpus import stopwords
import spacy
import pandas as pd

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
sentence = "In recent years, integrating technology into education has transformed how students learn and teachers instruct. Digital tools and resources have facilitated a more interactive learning environment, making education more accessible and personalized. One significant change is the rise of online learning platforms, allowing students to access a wide range of courses from anywhere. This flexibility benefits non-traditional students who may have jobs or family commitments, enabling them to learn at their own pace. Technology has also led to the development of educational tools like interactive simulations and gamified learning applications, which engage students in innovative ways. These tools not only make learning enjoyable but also encourage critical thinking. Teachers benefit from technology as well. Data analytics provide insights into student performance, allowing for targeted instruction that meets individual learning needs. However, challenges remain, such as the digital divide, which can exacerbate educational inequalities. Despite these issues, the potential of technology to enhance education is undeniable. Striking a balance between utilizing advancements and ensuring equitable access is crucial for creating an effective learning environment"

## Text Preprocessing

In [4]:
pattern_to_find = r"[^\w\s]"
no_punct_string = re.sub(pattern_to_find, " ", sentence)

In [5]:
en_stopwords = set(stopwords.words("english"))

In [6]:
sentence_no_stopwords = " ".join(word for word in no_punct_string.split() if word not in en_stopwords)
sentence_no_stopwords

'In recent years integrating technology education transformed students learn teachers instruct Digital tools resources facilitated interactive learning environment making education accessible personalized One significant change rise online learning platforms allowing students access wide range courses anywhere This flexibility benefits non traditional students may jobs family commitments enabling learn pace Technology also led development educational tools like interactive simulations gamified learning applications engage students innovative ways These tools make learning enjoyable also encourage critical thinking Teachers benefit technology well Data analytics provide insights student performance allowing targeted instruction meets individual learning needs However challenges remain digital divide exacerbate educational inequalities Despite issues potential technology enhance education undeniable Striking balance utilizing advancements ensuring equitable access crucial creating effect

In [7]:
# Now our text has been refined and ready to undergo the parts of speech tagging


## POS Tagging

In [8]:
spacy_doc = nlp(sentence_no_stopwords)

In [9]:
data = []

In [10]:
pos_df = pd.DataFrame(columns=["token", "pos_tag"])

In [11]:
for token in spacy_doc:
    data.append({"token": token.text,
                             "pos_tag": token.pos_})

In [12]:
pos_df = pd.DataFrame(data)
print(pos_df.head(15))

          token pos_tag
0            In     ADP
1        recent     ADJ
2         years    NOUN
3   integrating    VERB
4    technology    NOUN
5     education    NOUN
6   transformed    VERB
7      students    NOUN
8         learn    VERB
9      teachers    NOUN
10     instruct    VERB
11      Digital   PROPN
12        tools    NOUN
13    resources    NOUN
14  facilitated    VERB


In [13]:
pos_df_counts = pos_df.groupby(["token", "pos_tag"]).size().reset_index(name="counts").sort_values(by="counts", ascending=False)

In [18]:
pos_df_counts

Unnamed: 0,token,pos_tag,counts
60,learning,NOUN,5
85,students,NOUN,4
33,education,NOUN,3
88,technology,NOUN,3
90,tools,NOUN,3
...,...,...,...
31,digital,ADJ,1
30,development,NOUN,1
29,crucial,ADJ,1
28,critical,ADJ,1


In [15]:
pos_df_poscounts = pos_df_counts.groupby(["pos_tag"])["token"].count().sort_values(ascending= False)

In [16]:
pos_df_poscounts.head(10)

pos_tag
NOUN     35
VERB     28
ADJ      21
PROPN     4
ADV       3
ADP       2
DET       2
AUX       1
INTJ      1
NUM       1
Name: token, dtype: int64

In [17]:
nouns = pos_df_counts[pos_df_counts.pos_tag == "NOUN"][:10]
nouns

Unnamed: 0,token,pos_tag,counts
60,learning,NOUN,5
85,students,NOUN,4
33,education,NOUN,3
88,technology,NOUN,3
90,tools,NOUN,3
42,environment,NOUN,2
72,performance,NOUN,1
57,issues,NOUN,1
54,instruction,NOUN,1
71,pace,NOUN,1
