In [37]:
import pandas as pd

In [38]:
employee_reviews = pd.read_csv("../data/employee_reviews.csv")
employee_reviews.head(10)

Unnamed: 0,Employee Name,Review
0,Scarlett,So happy with the hybrid work policy! <u>under...
1,Chloe,<div>division content</div> The new intern is ...
2,Logan,<code>sample code</code> The new intern is doi...
3,Nina,Great onboarding process! <code>sample code</c...
4,Emily,Can someone check the broken AC in meeting roo...
5,Ava,Loving the <b>bold text</b> – makes everything...
6,Amelia,Loving the <code>sample code</code> – makes ev...
7,Harper,WiFi is unstable in some zones <i>italic text</i>
8,Daniel,Friday sessions are a great learning opportuni...
9,Mason,Too many overlapping tasks <u>underlined</u>


In [39]:
#Renaming Html urls
import re

def clean_html_url(text):
    #Remove html tags
    text = re.sub(r'<.*?>','', text)
    
    #remove urls
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    return text

employee_reviews['ReviewClean'] = employee_reviews['Review'].apply(clean_html_url)
employee_reviews['ReviewClean'].head()
    

0     So happy with the hybrid work policy! underlined
1    division content The new intern is doing great...
2      sample code The new intern is doing great work.
3              Great onboarding process! sample code 😤
4    Can someone check the broken AC in meeting roo...
Name: ReviewClean, dtype: object

In [42]:
import re

def remove_emoji(text):
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002700-\U000027BF"  # Dingbats
        u"\U000024C2-\U0001F251"
        "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

employee_reviews['ReviewNoEmoji'] = employee_reviews['ReviewClean'].apply(remove_emoji)
employee_reviews[['ReviewClean', 'ReviewNoEmoji']].head()

Unnamed: 0,ReviewClean,ReviewNoEmoji
0,So happy with the hybrid work policy! underlined,So happy with the hybrid work policy! underlined
1,division content The new intern is doing great...,division content The new intern is doing great...
2,sample code The new intern is doing great work.,sample code The new intern is doing great work.
3,Great onboarding process! sample code 😤,Great onboarding process! sample code
4,Can someone check the broken AC in meeting roo...,Can someone check the broken AC in meeting roo...


In [43]:
import nltk
from nltk.tokenize import word_tokenize

employee_reviews['Tokens'] = employee_reviews['ReviewNoEmoji'].apply(lambda x: word_tokenize(x))
employee_reviews['Tokens'].head()


0    [So, happy, with, the, hybrid, work, policy, !...
1    [division, content, The, new, intern, is, doin...
2    [sample, code, The, new, intern, is, doing, gr...
3        [Great, onboarding, process, !, sample, code]
4    [Can, someone, check, the, broken, AC, in, mee...
Name: Tokens, dtype: object

In [50]:
# cols_to_drop =['ReviewEmoji']
# employee_reviews = employee_reviews.drop(columns=cols_to_drop)
employee_reviews['Tokens'].head(1)

0    [So, happy, with, the, hybrid, work, policy, !...
Name: Tokens, dtype: object

In [51]:
#lower casing
employee_reviews['TokensLower'] = employee_reviews['Tokens'].apply(lambda x: [word.lower() for word in x])
employee_reviews.head()

Unnamed: 0,Employee Name,Review,ReviewClean,ReviewNoEmoji,Tokens,TokensLower
0,Scarlett,So happy with the hybrid work policy! <u>under...,So happy with the hybrid work policy! underlined,So happy with the hybrid work policy! underlined,"[So, happy, with, the, hybrid, work, policy, !...","[so, happy, with, the, hybrid, work, policy, !..."
1,Chloe,<div>division content</div> The new intern is ...,division content The new intern is doing great...,division content The new intern is doing great...,"[division, content, The, new, intern, is, doin...","[division, content, the, new, intern, is, doin..."
2,Logan,<code>sample code</code> The new intern is doi...,sample code The new intern is doing great work.,sample code The new intern is doing great work.,"[sample, code, The, new, intern, is, doing, gr...","[sample, code, the, new, intern, is, doing, gr..."
3,Nina,Great onboarding process! <code>sample code</c...,Great onboarding process! sample code 😤,Great onboarding process! sample code,"[Great, onboarding, process, !, sample, code]","[great, onboarding, process, !, sample, code]"
4,Emily,Can someone check the broken AC in meeting roo...,Can someone check the broken AC in meeting roo...,Can someone check the broken AC in meeting roo...,"[Can, someone, check, the, broken, AC, in, mee...","[can, someone, check, the, broken, ac, in, mee..."
