In [32]:
###Getting synonyms for words related to each category: work-life balance, culture and values, career opportunities, compensation and benefits, senior management
import nltk
from nltk.corpus import wordnet
from gensim.models import KeyedVectors

# Make sure to download NLTK resources if not already installed
nltk.download('wordnet')
nltk.download('omw-1.4')

# Define initial words for each factor
factor_words = {
    "work_life_balance": [
        "work life", "working hour", "overtime", "working on the weekend", "balance",
        "private life", "family", "maternity", "paternity", "vacation", "annual leave",
        "refresh", "monthly leave", "pto", "paid time", "holiday", "schedule"
    ],
    "culture_values": [
        "culture", "values", "organizational culture", "firm culture", "atmosphere",
        "oppression", "friend", "free food", "free lunch", "politics", "stress", "cowork"
    ],
    "career_opportunities": [
        "career opportunity", "advancement", "career growth", "promotion", "career development",
        "development program", "train", "skills"
    ],
    "compensation_benefits": [
        "compensation", "benefits", "pay", "remuneration", "wage", "money", "salary", "401k",
        "fund", "package", "reimbursement", "tuition", "insurance", "lay off", "fire"
    ],
    "senior_management": [
        "management", "leadership", "upper management", "team leader", "executives", "CEO"
    ]
}

!pip install gensim

import gensim.downloader as api

# Download the pre-trained model
word_vectors = api.load('word2vec-google-news-300')

# Save the model locally
word_vectors.save("word2vec.model")

word_vectors = KeyedVectors.load("word2vec.model")

# Function to get synonyms using WordNet
def get_wordnet_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())  # Add synonym lemma to set
    return synonyms

# Function to get similar words using Word2Vec
def get_word2vec_similar_words(word, topn=10):
    try:
        return word_vectors.most_similar(word, topn=topn)
    except KeyError:
        # If the word isn't in the Word2Vec vocabulary, return an empty list
        return []

# Function to expand factor keywords using both WordNet and Word2Vec
def expand_keywords(factor_words):
    expanded_words = {}
    for category, words in factor_words.items():
        expanded_words[category] = set()

        for word in words:
            # Get WordNet synonyms for the word
            wordnet_synonyms = get_wordnet_synonyms(word)
            expanded_words[category].update(wordnet_synonyms)

            # Get Word2Vec similar words for the word
            word2vec_similar_words = get_word2vec_similar_words(word)
            for similar_word, _ in word2vec_similar_words:
                expanded_words[category].add(similar_word)

        # Convert to list and remove duplicates (set already removes duplicates)
        expanded_words[category] = list(expanded_words[category])

    return expanded_words

# Expand the keywords for all factors
expanded_factor_keywords = expand_keywords(factor_words)

# Print the expanded keywords for each factor
for category, words in expanded_factor_keywords.items():
    print(f"{category}: {words[:10]}...")  # Printing the first 10 words for brevity


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


work_life_balance: ['home', 'siblings', 'holidays', 'Yuletide', 'residue', 'category', 'nonconference_schedule', 'Zyprexa_Cymbalta_Humalog', 'sept', 'Olshey_joked']...
culture_values: ['injustice', 'stress', 'cultural_diversity', 'anxiety', 'appreciate', 'oppressors', 'oppressions', 'subjugation', 'Pat_Tillman_personified', 'Quaker']...
career_opportunities: ['caravan', 'advancement', 'abilities', 'train', 'check', 'atomically_precise_manufacturing', 'gearing', 'acquirement', 'freight_train', 'promotional']...
compensation_benefits: ['do_good', 'force_out', 'blaze', 'firefighters', 'fervidness', 're_imbursement', 'cerebral_magnificence', 'fervour', 'reinsurance', 'tutorship']...
senior_management: ['NNSA_Bodman', 'CEOs', 'Chairman', 'direction', 'organizational_structure', 'leaders', 'COO', 'executive_director', 'leader', 'Management']...


In [33]:
for category, words in expanded_factor_keywords.items():
    print(f"{category}: {words}...")

work_life_balance: ['home', 'siblings', 'holidays', 'Yuletide', 'residue', 'category', 'nonconference_schedule', 'Zyprexa_Cymbalta_Humalog', 'sept', 'Olshey_joked', 'equipoise', 'Balance', 'mother', 'maternal_quality', 'romantic_getaway', 'neo_natal', 'nonconference_slate', 'aunt', 'fam_ily', 'motherliness', 'docket', 'unappropriated_fund', 'kinfolk', 'midwives', 'midwifery', 'equaliser', 'freshen', 'waning_seconds', 'Overtime', 'overtimes', 'relatives', 'maternity_ward', 'family', 'yuletide', 'remainder', 'Libra_the_Scales', 'counterweight', 'schedules', 'Garrett_Hartley_readies', 'Maternity', 'paternity_suit', 'class', 'Karen_Sala', 'itinerary', 'symmetry', 'balancing', 'daughter_Danielynn', 'parentage', 'Vacation', 'kin', 'refresh', 'crime_syndicate', 'reboot', 'kinsperson', 'spousal', 'variator', 'schedule', 'counterpoise', 'agenda', 'authorship', 'Smokey_Robinson_Bob_Dylan', 'friends', 'phratry', 'Libra', 'electro_hydraulic', 'vacation', 'fellowship', 'brush_up', 'pregnancy', 'pow

In [34]:
# Function to get synonyms for individual words
def get_wordnet_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())  # Add synonym lemma to set
    return synonyms

# Function to handle multi-word phrases
def get_phrase_synonyms(phrase):
    words = phrase.split()  # Split phrase into individual words
    phrase_synonyms = set()
    for word in words:
        phrase_synonyms.update(get_wordnet_synonyms(word))  # Get synonyms for each word
    return phrase_synonyms

for category, words in factor_words.items():
    print(f"Synonyms for category '{category}':")
    for word in words:
        synonyms = get_phrase_synonyms(word)
        print(f"  {word}: {synonyms}")
    print("\n")

Synonyms for category 'work_life_balance':
  work life: {'employment', 'life-time', 'work', 'oeuvre', 'life_history', 'life_story', 'mould', 'sour', 'process', 'animation', 'exploit', 'lick', 'do_work', 'lifespan', 'act', 'mold', 'workplace', 'make', 'living', 'ferment', 'shape', 'put_to_work', 'act_upon', 'cultivate', 'work_on', 'play', 'wreak', 'turn', 'life', 'forge', 'lifetime', 'puzzle_out', 'sprightliness', 'crop', 'operate', 'work_out', 'function', 'exercise', 'influence', 'liveliness', 'knead', 'body_of_work', 'piece_of_work', 'go', 'form', 'bring', 'life_sentence', 'make_for', 'aliveness', 'study', 'figure_out', 'spirit', 'run', 'solve', 'biography'}
  working hour: {'functional', 'work', 'mould', 'sour', 'process', 'exploit', 'lick', 'do_work', 'act', 'mold', 'workings', 'make', 'hr', 'ferment', 'shape', 'put_to_work', '60_minutes', 'act_upon', 'cultivate', 'work_on', 'on_the_job', 'play', 'wreak', 'turn', 'forge', 'puzzle_out', 'crop', 'running', 'operate', 'work_out', 'func

In [78]:

from collections import Counter

# Define your categories
categories = {
    "work_life_balance": [
        'work life', 'fellowship', 'holidays', 'overtime', 'schedule', 'honeymoon', 'family',
        'maternity', 'Christmas', 'home', 'paternity', 'vacationing', 'families', 'festive',
        'pregnancy', 'Maternity', 'motherhood', 'relatives', 'midwifery', 'Xmas', "father",
        'fatherhood', 'rest', 'vacay', 'famliy', 'midwives', 'power_takeoff', 'Thanksgiving',
        'Paternity', 'household', 'vacation', 'children', 'romantic_getaway', 'freshen_up',
        'family', 'getaways', 'friends', 'family_unit', 'aunt', 'private_life', 'weekend'
    ],
    "culture_values": [
        'culture', 'values', 'oppressors', 'anxiety', 'Stress', 'tenseness',
        'standard_atmosphere', 'strain', 'stress_hormones', 'assess', 'principles',
        'punctuate', 'stressful', 'poltics', 'oppression', 'ethos', 'civilisation',
        'atmospheres', 'subjugation', 'oppressions', 'oppressed', 'focus', 'Values',
        'politics', 'oppressiveness', 'relaxed_atmosphere', 'festive_atmosphere',
        'atmoshpere', 'ally',  'stressor', 'esteem', 'politcs',
        'stressful_situations', 'respect', 'stress', 'time_value', 'stressors',
        'cultural', 'vibe', 'indigenous_cultures', 'morals', 'tyranny', 'Politics',
        'political', 'stresses', 'traditions', 'partisan_politics', 'cultural_diversity',
        'ambiance', 'injustice', 'persecution', 'atmosphere', 'free_food', 'environment'
        'free_lunch', 'office_party', 'pizza_party', 'communication', 'mentor', 'training'
    ],
    "career_opportunities": [
        'career_opportunities', 'opportunities', 'groom',
        'interpersonal_skills', 'publicity', 'promoted', 'promotions', 'development',
        'educate', 'promotion', 'skillset', 'promoting', 'promotional_material',
        'development_program', 'technology','learn'
    ],
    "compensation_benefits": [
        'compensation', 'benefits', 'insurance', 'Undergraduate_tuition', 'wages',
        'tuition_fee', 'tuition', 'insured', 'Salary', 'stipend', 'signing_bonus',
        'payscale', 'pays', 'hourly_wages', 'salaries', 'payouts', 'repay', 'cash',
        'vast_sums', 'reinsurance', 'Monies', 'inferno', 'Minimum_wage', 'funds',
        'package', 'insurance_premiums', 'savings', 'variable_remuneration',
        'reimbursements', 'severance', 'reimburses', 'bonus', 'reimbursed',
        'pay_off', 'remunerations', 'Paying', 'Money', 'raise', 'give_the_axe',
        'Remuneration_Committee', 'hourly_wage', 'earnings', 'fire',
        'Medicaid_reimbursement', 'investment_trust', 'fuel', 'endowment', '401K'
    ],
    "senior_management": [
        'senior_management', 'senior_mgmt', 'exec', 'Chief_Executive_Officer',
        'Managing_Director', 'Chief_Executive', 'management', 'leaderships',
        'chairmanship', 'executive', 'CFO', 'administrator', 'chief_executive_officer',
        'chief_operating_officer', 'managers', 'mgmt', 'managing', 'COO', 'Execs',
        'organizational_structure', 'corporate_chieftains', 'exec', 'CEOs',
        'Chairman', 'executive_director', 'Management', 'Chief_Operating_Officer',
        'Executives', 'mentor'
    ]
}

# Convert all words to lowercase and deduplicate within each category
for category, words in categories.items():
    categories[category] = list(set([word.lower() for word in words]))

# Check for duplicates across categories
all_words = [word for words in categories.values() for word in words]
duplicates = [word for word, count in Counter(all_words).items() if count > 1]

# Print results
print("Updated Categories with Lowercase Words and Deduplication:")
for category, words in categories.items():
    print(f"{category}: {sorted(words)}\n")

print("Duplicates Across Categories:")
print(duplicates)

Updated Categories with Lowercase Words and Deduplication:
work_life_balance: ['aunt', 'children', 'christmas', 'families', 'family', 'family_unit', 'famliy', 'father', 'fatherhood', 'fellowship', 'festive', 'freshen_up', 'friends', 'getaways', 'holidays', 'home', 'honeymoon', 'household', 'maternity', 'midwifery', 'midwives', 'motherhood', 'overtime', 'paternity', 'power_takeoff', 'pregnancy', 'private_life', 'relatives', 'rest', 'romantic_getaway', 'schedule', 'thanksgiving', 'vacation', 'vacationing', 'vacay', 'weekend', 'work life', 'xmas']

culture_values: ['ally', 'ambiance', 'anxiety', 'assess', 'atmoshpere', 'atmosphere', 'atmospheres', 'civilisation', 'communication', 'cultural', 'cultural_diversity', 'culture', 'environmentfree_lunch', 'esteem', 'ethos', 'festive_atmosphere', 'focus', 'free_food', 'indigenous_cultures', 'injustice', 'mentor', 'morals', 'office_party', 'oppressed', 'oppression', 'oppressions', 'oppressiveness', 'oppressors', 'partisan_politics', 'persecution',

In [36]:
# prompt: read this file /content/glassdoor_reviews_COPY_FULL.xlsx

import pandas as pd
import nltk
from nltk.corpus import wordnet
from gensim.models import KeyedVectors
import gensim.downloader as api
from collections import Counter

# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')

df = pd.read_excel('/content/glassdoor_reviews_COPY_FULL.xlsx')
df.head()




[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Unnamed: 0,Review ID,Company Name,Overall Rating,Header,Job Title,Employment Status,Employment Tenure,Location,Recommend,CEO Approval,Business Outlook,Pros,Cons,Review Date
0,1,Deloitte,3,Good learning,Senior manager,Current employee,more than 10 years,Mumbai,Yes,Yes,Yes,Lot of learning and to work on latest technologies,Work life fit and bureaucracy,"Oct 28, 2024"
1,2,Deloitte,3,Work environment,Senior tax manager,Current employee,more than 10 years,Portugal,No,No Data,No,Acess to information and a lot of learning courses.,Environment very competitive between colleagues.,"Oct 28, 2024"
2,3,Deloitte,4,Good work culture,Analyst,Current employee,less than 1 year,Hyderābād,Yes,Yes,Yes,Flexi and nice people communication,Less salary and also growth is slow,"Oct 28, 2024"
3,4,Deloitte,4,Good place,Senior developer,Current employee,,Bengaluru,No Data,No Data,No Data,Good working culture in the office,Less salary and less increments,"Oct 28, 2024"
4,5,Deloitte,3,Life at deloitte,Tax consultant,Former employee,,Pune,No Data,No Data,No Data,"Great working environment, Good mentoring",Too many working hours and no extra payment,"Oct 28, 2024"


In [37]:
import pandas as pd



# Combine "Pros" and "Cons" with appropriate labels
df["review_text"] = (
    "pros: " + df["Pros"].fillna("").str.lower().str.replace("page", "", case=False).str.strip()# +
  #  " cons: " + df["Cons"].fillna("").str.lower().str.replace("page", "", case=False).str.strip()
)

# Display the result
print(df[["review_text"]])

                                                                                                                                     review_text
0                                                                                       pros: lot of learning and to work on latest technologies
1                                                                                      pros: acess to information and a lot of learning courses.
2                                                                                                      pros: flexi and nice people communication
3                                                                                                       pros: good working culture in the office
4                                                                                                pros: great working environment, good mentoring
...                                                                                                                               

In [38]:
# Ensure all rows of the column are fully visible
pd.set_option('display.max_colwidth', None)

# Print the first 5 rows of the "review_text" column
print(df["review_text"].head(5))


0     pros: lot of learning and to work on latest technologies
1    pros: acess to information and a lot of learning courses.
2                    pros: flexi and nice people communication
3                     pros: good working culture in the office
4              pros: great working environment, good mentoring
Name: review_text, dtype: object


In [39]:
df.head()

Unnamed: 0,Review ID,Company Name,Overall Rating,Header,Job Title,Employment Status,Employment Tenure,Location,Recommend,CEO Approval,Business Outlook,Pros,Cons,Review Date,review_text
0,1,Deloitte,3,Good learning,Senior manager,Current employee,more than 10 years,Mumbai,Yes,Yes,Yes,Lot of learning and to work on latest technologies,Work life fit and bureaucracy,"Oct 28, 2024",pros: lot of learning and to work on latest technologies
1,2,Deloitte,3,Work environment,Senior tax manager,Current employee,more than 10 years,Portugal,No,No Data,No,Acess to information and a lot of learning courses.,Environment very competitive between colleagues.,"Oct 28, 2024",pros: acess to information and a lot of learning courses.
2,3,Deloitte,4,Good work culture,Analyst,Current employee,less than 1 year,Hyderābād,Yes,Yes,Yes,Flexi and nice people communication,Less salary and also growth is slow,"Oct 28, 2024",pros: flexi and nice people communication
3,4,Deloitte,4,Good place,Senior developer,Current employee,,Bengaluru,No Data,No Data,No Data,Good working culture in the office,Less salary and less increments,"Oct 28, 2024",pros: good working culture in the office
4,5,Deloitte,3,Life at deloitte,Tax consultant,Former employee,,Pune,No Data,No Data,No Data,"Great working environment, Good mentoring",Too many working hours and no extra payment,"Oct 28, 2024","pros: great working environment, good mentoring"


In [40]:
print(categories)

{'work_life_balance': ['home', 'holidays', 'overtime', 'motherhood', 'vacationing', 'pregnancy', 'children', 'festive', 'household', 'romantic_getaway', 'honeymoon', 'aunt', 'private_life', 'getaways', 'schedule', 'father', 'midwives', 'midwifery', 'weekend', 'families', 'vacay', 'work life', 'thanksgiving', 'friends', 'xmas', 'maternity', 'fatherhood', 'famliy', 'relatives', 'vacation', 'rest', 'family_unit', 'christmas', 'fellowship', 'paternity', 'freshen_up', 'family', 'power_takeoff'], 'culture_values': ['punctuate', 'injustice', 'stress', 'cultural_diversity', 'anxiety', 'oppressors', 'oppressions', 'stressor', 'subjugation', 'training', 'standard_atmosphere', 'festive_atmosphere', 'mentor', 'stresses', 'tyranny', 'atmosphere', 'respect', 'vibe', 'politics', 'partisan_politics', 'free_food', 'principles', 'communication', 'stress_hormones', 'strain', 'morals', 'tenseness', 'pizza_party', 'persecution', 'time_value', 'free_lunch', 'oppressiveness', 'oppressed', 'focus', 'stressful

In [41]:
# Replace underscores with spaces in categories for matching
categories = {
    key: [phrase.replace("_", " ") for phrase in phrases]
    for key, phrases in categories.items()
}
print(categories)

{'work_life_balance': ['home', 'holidays', 'overtime', 'motherhood', 'vacationing', 'pregnancy', 'children', 'festive', 'household', 'romantic getaway', 'honeymoon', 'aunt', 'private life', 'getaways', 'schedule', 'father', 'midwives', 'midwifery', 'weekend', 'families', 'vacay', 'work life', 'thanksgiving', 'friends', 'xmas', 'maternity', 'fatherhood', 'famliy', 'relatives', 'vacation', 'rest', 'family unit', 'christmas', 'fellowship', 'paternity', 'freshen up', 'family', 'power takeoff'], 'culture_values': ['punctuate', 'injustice', 'stress', 'cultural diversity', 'anxiety', 'oppressors', 'oppressions', 'stressor', 'subjugation', 'training', 'standard atmosphere', 'festive atmosphere', 'mentor', 'stresses', 'tyranny', 'atmosphere', 'respect', 'vibe', 'politics', 'partisan politics', 'free food', 'principles', 'communication', 'stress hormones', 'strain', 'morals', 'tenseness', 'pizza party', 'persecution', 'time value', 'free lunch', 'oppressiveness', 'oppressed', 'focus', 'stressful

In [67]:
!pip install spacy
!python -m spacy download en_core_web_sm



Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [75]:
import spacy

# Load spaCy's English model
nlp = spacy.load("en_core_web_sm")

def check_phrase_in_sentence(sentence, phrase):
    # Process the sentence and the phrase
    doc_sentence = nlp(sentence.lower())
    doc_phrase = nlp(phrase.lower())

    # Lemmatize both sentence and phrase words and check for semantic similarity
    sentence_lemmas = [token.lemma_ for token in doc_sentence]
    phrase_lemmas = [token.lemma_ for token in doc_phrase]

    # Check if the entire phrase exists in the lemmatized sentence as a sequence
    sentence_text = " ".join(sentence_lemmas)
    phrase_text = " ".join(phrase_lemmas)

    return phrase_text in sentence_text
    # Check if the lemmatized phrase words exist in the lemmatized sentence
    for word in phrase_lemmas:
        if word not in sentence_lemmas:
            return False  # Word not found
    return True  # All words found (in some form)


In [None]:
!pip install rapidfuzz
import pandas as pd
from transformers import pipeline
from rapidfuzz import fuzz, process

In [81]:


# Load the RoBERTa sentiment model
sentiment_model = pipeline("sentiment-analysis")

# Function to compute sentiment scores
def calculate_category_sentiment(review, category_keywords):
    """
    Calculate sentiment scores for a review for specific category keywords.
    Uses fuzzy matching to find relevant text fragments and RoBERTa for sentiment scoring.
    """
    scores = []

    for keyword in category_keywords:
        # Fuzzy match the keyword in the review
        matches = process.extract(keyword, review.split(), scorer=fuzz.partial_ratio, limit=5)
        # Threshold for considering a match (e.g., >= 80)
        matched_phrases = [match[0] for match in matches if match[1] >= 95]


        result = check_phrase_in_sentence(review, keyword)


        if result:
            scores.append(1) #polarity)
            print(f"Phrase : {keyword}, sentence: {review}")
        else:
            scores.append(0) #polarity)

        if matched_phrases:
            # Join matched phrases into a text snippet for sentiment scoring
            snippet = " ".join(matched_phrases)

            sentiment = sentiment_model(snippet)[0]  # Get sentiment score
            # Convert sentiment to polarity: positive = +1, negative = -1, neutral = 0
            if sentiment["label"] == "POSITIVE":
              polarity = sentiment["score"]
            elif sentiment["label"] == "NEGATIVE":
              polarity = -sentiment["score"]
            else:
                 polarity = 0
            #scores.append(polarity)
    return sum(scores) if scores else 0  # Return the total polarity score


# Apply the function to the DataFrame
def score_reviews(df, categories):
    for category, keywords in categories.items():
        df[category + "_sentiment"] = df["review_text"][:1000].apply(
            lambda x: calculate_category_sentiment(x.lower(), keywords)
        )
    return df



# Calculate sentiment scores for each category
df = score_reviews(df, categories)

# Display the DataFrame with added sentiment columns
pd.set_option('display.max_columns', None)  # Ensure all columns are visible
#print(df)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Phrase : rest, sentence: pros: interesting clients, exposure to top field experts and partners.
Phrase : friends, sentence: pros: great culture, nice environment, friendly team
Phrase : rest, sentence: pros: well established organization prestigous work place
Phrase : home, sentence: pros: - flexible work from home - nice colleagues - growth opportunities
Phrase : holidays, sentence: pros: good well being benefits and smart holiday structure with decent hikes every year
Phrase : work life, sentence: pros: good work life balance. less work comparatively
Phrase : rest, sentence: pros: interesting work nice colleagues good benefits
Phrase : friends, sentence: pros: good benefits and friendly employees
Phrase : home, sentence: pros: employee friendly working from home flexibility
Phrase : friends, sentence: pros: employee friendly working from home flexibility
Phrase : home, sentence: pros: hike, benefits, flexible work from home
Phrase : rest, sentence: pros: great experience once you fin

KeyboardInterrupt: 

In [82]:
df.head()

Unnamed: 0,Review ID,Company Name,Overall Rating,Header,Job Title,Employment Status,Employment Tenure,Location,Recommend,CEO Approval,Business Outlook,Pros,Cons,Review Date,review_text,work_life_balance_sentiment,culture_values_sentiment,career_opportunities_sentiment,compensation_benefits_sentiment,senior_management_sentiment
0,1,Deloitte,3,Good learning,Senior manager,Current employee,more than 10 years,Mumbai,Yes,Yes,Yes,Lot of learning and to work on latest technologies,Work life fit and bureaucracy,"Oct 28, 2024",pros: lot of learning and to work on latest technologies,0.0,0.0,2.0,0.0,0.0
1,2,Deloitte,3,Work environment,Senior tax manager,Current employee,more than 10 years,Portugal,No,No Data,No,Acess to information and a lot of learning courses.,Environment very competitive between colleagues.,"Oct 28, 2024",pros: acess to information and a lot of learning courses.,0.0,0.0,1.0,0.0,0.0
2,3,Deloitte,4,Good work culture,Analyst,Current employee,less than 1 year,Hyderābād,Yes,Yes,Yes,Flexi and nice people communication,Less salary and also growth is slow,"Oct 28, 2024",pros: flexi and nice people communication,0.0,1.0,0.0,0.0,0.0
3,4,Deloitte,4,Good place,Senior developer,Current employee,,Bengaluru,No Data,No Data,No Data,Good working culture in the office,Less salary and less increments,"Oct 28, 2024",pros: good working culture in the office,0.0,1.0,0.0,0.0,0.0
4,5,Deloitte,3,Life at deloitte,Tax consultant,Former employee,,Pune,No Data,No Data,No Data,"Great working environment, Good mentoring",Too many working hours and no extra payment,"Oct 28, 2024","pros: great working environment, good mentoring",0.0,1.0,0.0,0.0,1.0


In [86]:
display(df[:20])

Unnamed: 0,Review ID,Company Name,Overall Rating,Header,Job Title,Employment Status,Employment Tenure,Location,Recommend,CEO Approval,Business Outlook,Pros,Cons,Review Date,review_text,work_life_balance_sentiment,culture_values_sentiment,career_opportunities_sentiment,compensation_benefits_sentiment,senior_management_sentiment
0,1,Deloitte,3,Good learning,Senior manager,Current employee,more than 10 years,Mumbai,Yes,Yes,Yes,Lot of learning and to work on latest technologies,Work life fit and bureaucracy,"Oct 28, 2024",pros: lot of learning and to work on latest technologies,0.0,0.0,2.0,0.0,0.0
1,2,Deloitte,3,Work environment,Senior tax manager,Current employee,more than 10 years,Portugal,No,No Data,No,Acess to information and a lot of learning courses.,Environment very competitive between colleagues.,"Oct 28, 2024",pros: acess to information and a lot of learning courses.,0.0,0.0,1.0,0.0,0.0
2,3,Deloitte,4,Good work culture,Analyst,Current employee,less than 1 year,Hyderābād,Yes,Yes,Yes,Flexi and nice people communication,Less salary and also growth is slow,"Oct 28, 2024",pros: flexi and nice people communication,0.0,1.0,0.0,0.0,0.0
3,4,Deloitte,4,Good place,Senior developer,Current employee,,Bengaluru,No Data,No Data,No Data,Good working culture in the office,Less salary and less increments,"Oct 28, 2024",pros: good working culture in the office,0.0,1.0,0.0,0.0,0.0
4,5,Deloitte,3,Life at deloitte,Tax consultant,Former employee,,Pune,No Data,No Data,No Data,"Great working environment, Good mentoring",Too many working hours and no extra payment,"Oct 28, 2024","pros: great working environment, good mentoring",0.0,1.0,0.0,0.0,1.0
5,6,Deloitte,4,Tough but interesting,Senior auditor,Current employee,more than 1 year,Sydney,Yes,Yes,Yes,"Interesting clients, exposure to top field experts and partners.",Results driven and high attrition rate. Long hours.,"Oct 28, 2024","pros: interesting clients, exposure to top field experts and partners.",1.0,,,,
6,7,Deloitte,4,Get your ticket punched,Advisory senior manager,Former employee,more than 5 years,"Miami, FL",Yes,Yes,Yes,Hard work pays off in industry,Always executing someone elses vision,"Oct 28, 2024",pros: hard work pays off in industry,0.0,,,,
7,8,Deloitte,4,Great place,Manager,Current employee,,"New York, NY",No Data,No Data,No Data,"Flexibility, challenging, compensation, autonomy, health and 401k benefits","Competitive, sales and development, not enough internal transfer support","Oct 28, 2024","pros: flexibility, challenging, compensation, autonomy, health and 401k benefits",0.0,,,,
8,9,Deloitte,4,Big Company,Consultant,Current employee,,"Washington, DC",No Data,No Data,No Data,Lots of opportunities and exposure to different projects,Can feel too large. Easy to get lost.,"Oct 28, 2024",pros: lots of opportunities and exposure to different projects,0.0,,,,
9,10,Deloitte,5,Great first company,Analyst,Former employee,more than 1 year,"Gurgaon, Haryana",Yes,Yes,No Data,Excellent exposure 50% travel (client visits) steep learning curve,No cons at all its an amazing company to work at. Page2,"Oct 28, 2024",pros: excellent exposure 50% travel (client visits) steep learning curve,0.0,,,,
