In [None]:
!pip install nltk



In [None]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, ne_chunk
from nltk.tokenize import word_tokenize, sent_tokenize

In [None]:
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [None]:
# Load the dataset
file_path = '/content/NLPDATA1.csv'  # Replace with your file path
data = pd.read_csv(file_path)
data.head(), data.columns

(                                             Comment
 0  i seriously hate one subject to death but now ...
 1                 im so full of life i feel appalled
 2  i sit here to write i start to dig out my feel...
 3  ive been really angry with r and i feel like a...
 4  i feel suspicious if there is no one outside l...,
 Index(['Comment'], dtype='object'))

In [None]:
# Tokenization by sentences
def simple_sent_tokenize(text):
    return re.split(r'(?<=[.!?]) +', text)

data['Sentence_Tokens'] = data['Comment'].apply(simple_sent_tokenize)
data.head()

Unnamed: 0,Comment,Sentence_Tokens
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...
1,im so full of life i feel appalled,[im so full of life i feel appalled]
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.
Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


In [None]:
# Tokenization by words
def simple_word_tokenize(text):
    return re.findall(r'\b\w+\b', text.lower())

data['Word_Tokens'] = data['Comment'].apply(simple_word_tokenize)
print(data['Word_Tokens'])

0       [i, seriously, hate, one, subject, to, death, ...
1             [im, so, full, of, life, i, feel, appalled]
2       [i, sit, here, to, write, i, start, to, dig, o...
3       [ive, been, really, angry, with, r, and, i, fe...
4       [i, feel, suspicious, if, there, is, no, one, ...
                              ...                        
5932           [i, begun, to, feel, distressed, for, you]
5933    [i, left, feeling, annoyed, and, angry, thinki...
5934    [i, were, to, ever, get, married, i, d, have, ...
5935    [i, feel, reluctant, in, applying, there, beca...
5936    [i, just, wanted, to, apologize, to, you, beca...
Name: Word_Tokens, Length: 5937, dtype: object


In [None]:
# Filtering stop words
stop_words = set(stopwords.words('english'))
data['Filtered_Words'] = data['Word_Tokens'].apply(lambda tokens: [word for word in tokens if word not in stop_words])
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]"
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture..."


In [None]:
# Stemming
stemmer = PorterStemmer()
data['Stemmed_Words'] = data['Filtered_Words'].apply(lambda tokens: [stemmer.stem(word) for word in tokens])
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words,Stemmed_Words
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r...","[serious, hate, one, subject, death, feel, rel..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]","[im, full, life, feel, appal]"
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra...","[sit, write, start, dig, feel, think, afraid, ..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru...","[ive, realli, angri, r, feel, like, idiot, tru..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture...","[feel, suspici, one, outsid, like, raptur, hap..."


In [None]:
# POS Tagging
data['POS_Tags'] = data['Filtered_Words'].apply(pos_tag)
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words,Stemmed_Words,POS_Tags
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r...","[serious, hate, one, subject, death, feel, rel...","[(seriously, RB), (hate, VB), (one, CD), (subj..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]","[im, full, life, feel, appal]","[(im, NN), (full, JJ), (life, NN), (feel, NN),..."
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra...","[sit, write, start, dig, feel, think, afraid, ...","[(sit, NN), (write, JJ), (start, NN), (dig, NN..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru...","[ive, realli, angri, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture...","[feel, suspici, one, outsid, like, raptur, hap...","[(feel, RB), (suspicious, JJ), (one, CD), (out..."


In [None]:
# Lemmatization
lemmatizer = WordNetLemmatizer()
data['Lemmatized_Words'] = data['Filtered_Words'].apply(lambda tokens: [lemmatizer.lemmatize(word) for word in tokens])
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words,Stemmed_Words,POS_Tags,Lemmatized_Words
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r...","[serious, hate, one, subject, death, feel, rel...","[(seriously, RB), (hate, VB), (one, CD), (subj...","[seriously, hate, one, subject, death, feel, r..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]","[im, full, life, feel, appal]","[(im, NN), (full, JJ), (life, NN), (feel, NN),...","[im, full, life, feel, appalled]"
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra...","[sit, write, start, dig, feel, think, afraid, ...","[(sit, NN), (write, JJ), (start, NN), (dig, NN...","[sit, write, start, dig, feeling, think, afrai..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru...","[ive, realli, angri, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)...","[ive, really, angry, r, feel, like, idiot, tru..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture...","[feel, suspici, one, outsid, like, raptur, hap...","[(feel, RB), (suspicious, JJ), (one, CD), (out...","[feel, suspicious, one, outside, like, rapture..."


In [None]:
# Chunking (using POS tags)
data['Chunks'] = data['POS_Tags'].apply(lambda tags: ne_chunk(tags))
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words,Stemmed_Words,POS_Tags,Lemmatized_Words,Chunks
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r...","[serious, hate, one, subject, death, feel, rel...","[(seriously, RB), (hate, VB), (one, CD), (subj...","[seriously, hate, one, subject, death, feel, r...","[(seriously, RB), (hate, VB), (one, CD), (subj..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]","[im, full, life, feel, appal]","[(im, NN), (full, JJ), (life, NN), (feel, NN),...","[im, full, life, feel, appalled]","[(im, NN), (full, JJ), (life, NN), (feel, NN),..."
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra...","[sit, write, start, dig, feel, think, afraid, ...","[(sit, NN), (write, JJ), (start, NN), (dig, NN...","[sit, write, start, dig, feeling, think, afrai...","[(sit, NN), (write, JJ), (start, NN), (dig, NN..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru...","[ive, realli, angri, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)...","[ive, really, angry, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture...","[feel, suspici, one, outsid, like, raptur, hap...","[(feel, RB), (suspicious, JJ), (one, CD), (out...","[feel, suspicious, one, outside, like, rapture...","[(feel, RB), (suspicious, JJ), (one, CD), (out..."


In [None]:
# Named Entity Recognition (NER)
data['Named_Entities'] = data['POS_Tags'].apply(lambda tags: ne_chunk(tags))
data.head()

Unnamed: 0,Comment,Sentence_Tokens,Word_Tokens,Filtered_Words,Stemmed_Words,POS_Tags,Lemmatized_Words,Chunks,Named_Entities
0,i seriously hate one subject to death but now ...,[i seriously hate one subject to death but now...,"[i, seriously, hate, one, subject, to, death, ...","[seriously, hate, one, subject, death, feel, r...","[serious, hate, one, subject, death, feel, rel...","[(seriously, RB), (hate, VB), (one, CD), (subj...","[seriously, hate, one, subject, death, feel, r...","[(seriously, RB), (hate, VB), (one, CD), (subj...","[(seriously, RB), (hate, VB), (one, CD), (subj..."
1,im so full of life i feel appalled,[im so full of life i feel appalled],"[im, so, full, of, life, i, feel, appalled]","[im, full, life, feel, appalled]","[im, full, life, feel, appal]","[(im, NN), (full, JJ), (life, NN), (feel, NN),...","[im, full, life, feel, appalled]","[(im, NN), (full, JJ), (life, NN), (feel, NN),...","[(im, NN), (full, JJ), (life, NN), (feel, NN),..."
2,i sit here to write i start to dig out my feel...,[i sit here to write i start to dig out my fee...,"[i, sit, here, to, write, i, start, to, dig, o...","[sit, write, start, dig, feelings, think, afra...","[sit, write, start, dig, feel, think, afraid, ...","[(sit, NN), (write, JJ), (start, NN), (dig, NN...","[sit, write, start, dig, feeling, think, afrai...","[(sit, NN), (write, JJ), (start, NN), (dig, NN...","[(sit, NN), (write, JJ), (start, NN), (dig, NN..."
3,ive been really angry with r and i feel like a...,[ive been really angry with r and i feel like ...,"[ive, been, really, angry, with, r, and, i, fe...","[ive, really, angry, r, feel, like, idiot, tru...","[ive, realli, angri, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)...","[ive, really, angry, r, feel, like, idiot, tru...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)...","[(ive, JJ), (really, RB), (angry, JJ), (r, NN)..."
4,i feel suspicious if there is no one outside l...,[i feel suspicious if there is no one outside ...,"[i, feel, suspicious, if, there, is, no, one, ...","[feel, suspicious, one, outside, like, rapture...","[feel, suspici, one, outsid, like, raptur, hap...","[(feel, RB), (suspicious, JJ), (one, CD), (out...","[feel, suspicious, one, outside, like, rapture...","[(feel, RB), (suspicious, JJ), (one, CD), (out...","[(feel, RB), (suspicious, JJ), (one, CD), (out..."
