# Reviews Company

In [4]:
# !conda install -c conda-forge sentence-transformers
# !pip install scikit-learn
# !pip install numpy pandas
# !pip show sentence-transformers

In [5]:
# import required packages
import pandas as pd
import numpy as np
import spacy
import re
from spacy.matcher import PhraseMatcher
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings("ignore")

In [6]:
pd.set_option("display.max_colwidth",None)
# pd.reset_option("display.max_row",None)

In [7]:
data = pd.read_csv("Data_Merge_Reviews.csv")
data

Unnamed: 0,Job Title,Job Rating,Time,Job Status,Pros,Cons,Company_Name
0,Brand Ambassador,5.0,2025-08-08,"Former employee, more than 1 year","Flexible Hours, Supportive Team, Good Training, Fun Work Environment, Opportunity To Travel.","Inconsistent Hours, Limited Career Growth, Commission Structure Can Be Unclear, Standing For Long Hours, Sometimes Slow Communication.",Safaricom
1,Senior Mobile Developer,4.0,2025-07-28,"Current contractor, more than 1 year",Great For Gaining Experience On Large Scale Enterprise Applications,A Lot Of Shifting Priorities,Safaricom
2,Project Manager,4.0,2025-07-15,"Former employee, more than 3 years","Strong Support For Professional Growth Through Training Exposure To LargeScale, Impactful Projects Across Kenya Competitive Salary And PerformanceBased Bonuses",Occasional Tight Deadlines And HighPressure Timelines Bureaucracy Can Delay Project SignOffs,Safaricom
3,Ict Technician,5.0,2025-07-18,Former employee,The Working Experience Was Good,There Are No Major Drawbacks I Can Talk About,Safaricom
4,Intern,5.0,2025-07-15,"Former intern, less than 1 year",Great Work Environment. Regular Bonuses.,None I Can Think Of,Safaricom
...,...,...,...,...,...,...,...
25037,Planning officer,4.0,2022-03-31,Former employee,have good medical covers for employees,terms of employment are contract,Equity
25038,Senior business analyst,4.0,2022-05-14,Current employee,Has good Learning potential for staff,Working schedule is not flexible,Equity
25039,Senior program manager,3.0,2016-05-02,Former employee,Potential for great social impact,Can be slow at times,Equity
25040,Ban assurance officer,1.0,2022-10-17,"Former employee, more than 1 year",Medical insurance cover. Low interest loans for staff,Bad management Less than appropriate pay Extremely long work hours Extensive workload Toxic work culture,Equity


In [8]:
# using 
data2 = data

In [9]:
def split_info(df: pd.Series)-> pd.DataFrame:
    employment_type = []
    role_type = []
    duration = []
    for raw in df:
        data = raw.split()
        emp = rol = dur = "default"

        if len(data) >=2:
            emp = data[0]
            rol = data[1].replace(",", "")

        if "less than 1 year" in raw:
            dur = "<1 year"
        elif "more than 1 year" in raw:
            dur = "1 - 3 year"
        elif "more than 3 years" in raw:
            dur = ">3 year"
        employment_type.append(emp)
        role_type.append(rol)
        duration.append(dur)
         
    return pd.DataFrame({
       "Employment_Type":employment_type,
        "Role_Type":role_type,
        "Duration":duration
    })
new_cols = split_info(data2["Job Status"])

In [10]:
data2 = pd.concat([data2, new_cols], axis=1)
data2 = data2[
    ["Job Title", "Job Rating", "Time", "Job Status", 
     "Employment_Type", "Role_Type", "Duration",
     "Pros", "Cons", "Company_Name"]
]

In [11]:
# data2.drop(["Employment_Type", "Role_Type", "Duration"], axis=1, inplace=True)

In [12]:
data2

Unnamed: 0,Job Title,Job Rating,Time,Job Status,Employment_Type,Role_Type,Duration,Pros,Cons,Company_Name
0,Brand Ambassador,5.0,2025-08-08,"Former employee, more than 1 year",Former,employee,1 - 3 year,"Flexible Hours, Supportive Team, Good Training, Fun Work Environment, Opportunity To Travel.","Inconsistent Hours, Limited Career Growth, Commission Structure Can Be Unclear, Standing For Long Hours, Sometimes Slow Communication.",Safaricom
1,Senior Mobile Developer,4.0,2025-07-28,"Current contractor, more than 1 year",Current,contractor,1 - 3 year,Great For Gaining Experience On Large Scale Enterprise Applications,A Lot Of Shifting Priorities,Safaricom
2,Project Manager,4.0,2025-07-15,"Former employee, more than 3 years",Former,employee,>3 year,"Strong Support For Professional Growth Through Training Exposure To LargeScale, Impactful Projects Across Kenya Competitive Salary And PerformanceBased Bonuses",Occasional Tight Deadlines And HighPressure Timelines Bureaucracy Can Delay Project SignOffs,Safaricom
3,Ict Technician,5.0,2025-07-18,Former employee,Former,employee,default,The Working Experience Was Good,There Are No Major Drawbacks I Can Talk About,Safaricom
4,Intern,5.0,2025-07-15,"Former intern, less than 1 year",Former,intern,<1 year,Great Work Environment. Regular Bonuses.,None I Can Think Of,Safaricom
...,...,...,...,...,...,...,...,...,...,...
25037,Planning officer,4.0,2022-03-31,Former employee,Former,employee,default,have good medical covers for employees,terms of employment are contract,Equity
25038,Senior business analyst,4.0,2022-05-14,Current employee,Current,employee,default,Has good Learning potential for staff,Working schedule is not flexible,Equity
25039,Senior program manager,3.0,2016-05-02,Former employee,Former,employee,default,Potential for great social impact,Can be slow at times,Equity
25040,Ban assurance officer,1.0,2022-10-17,"Former employee, more than 1 year",Former,employee,1 - 3 year,Medical insurance cover. Low interest loans for staff,Bad management Less than appropriate pay Extremely long work hours Extensive workload Toxic work culture,Equity


In [13]:
data2["Employment_Type"].unique().tolist()

['Former', 'Current', 'KEY']

In [14]:
data2[data2["Employment_Type"] == "KEY"]

Unnamed: 0,Job Title,Job Rating,Time,Job Status,Employment_Type,Role_Type,Duration,Pros,Cons,Company_Name
467,Online Data Entry,5.0,2021-07-21,KEY NOT FOUND: jobLine.seasonal-current,KEY,NOT,default,The Company Pays Well And On Time,Commissions Were Very Few Also Allowances,Safaricom


In [15]:
data2.drop(467, axis=0, inplace=True)

In [16]:
data2.set_index("Job Title", inplace=True)

In [17]:
data2= data2.reset_index()

In [18]:
data2["Role_Type"].unique().tolist()

['employee', 'contractor', 'intern', 'temporary', 'freelancer']

In [19]:
# jobrole = {"intern":"Internship"}

In [20]:
data2["Role_Type"].replace("intern", "internship",inplace=True)

In [21]:
# Normalize text (lowercase, remove weird symbols, etc.).
data2["Pros"] = data2["Pros"].str.lower()

In [22]:
data2["Job Title"] = data2["Job Title"].str.lower()

In [23]:
# This command is used to change the it's to it is 
import contractions
data2["Cons"] = data2["Cons"].apply(lambda x: contractions.fix(x) if isinstance(x, str) else x)

In [24]:
data2[["Cons","Company_Name"]].head(50)

Unnamed: 0,Cons,Company_Name
0,"Inconsistent Hours, Limited Career Growth, Commission Structure Can Be Unclear, Standing For Long Hours, Sometimes Slow Communication.",Safaricom
1,A Lot Of Shifting Priorities,Safaricom
2,Occasional Tight Deadlines And HighPressure Timelines Bureaucracy Can Delay Project SignOffs,Safaricom
3,There Are No Major Drawbacks I Can Talk About,Safaricom
4,None I Can Think Of,Safaricom
5,Workload May Be Too Much Sometime,Safaricom
6,Tough Management Extreme Work Conditions Pressure Nepotism,Safaricom
7,Is Place That Offers Carrier Growth,Safaricom
8,No Growth Compensation For Customer Care Is Not Worth It,Safaricom
9,Have To Meet Strict Deadlines,Safaricom


In [25]:
from spellchecker import SpellChecker
spell = SpellChecker()

def correct_sentence(text):
    if not isinstance(text, str):
        text = ""
    words = text.split()
    correct_words = []

    for word in words:
        if word.lower() in spell:
            correct_words.append(word)
        else:
            correction = spell.correction(word)
            correct_words.append(correction if correction else word)
            
    return " ".join(correct_words)

In [26]:
# data2["Corrected_Cons"] = data2["Cons"].fillna("").apply(correct_sentence)
data2

Unnamed: 0,Job Title,Job Rating,Time,Job Status,Employment_Type,Role_Type,Duration,Pros,Cons,Company_Name
0,brand ambassador,5.0,2025-08-08,"Former employee, more than 1 year",Former,employee,1 - 3 year,"flexible hours, supportive team, good training, fun work environment, opportunity to travel.","Inconsistent Hours, Limited Career Growth, Commission Structure Can Be Unclear, Standing For Long Hours, Sometimes Slow Communication.",Safaricom
1,senior mobile developer,4.0,2025-07-28,"Current contractor, more than 1 year",Current,contractor,1 - 3 year,great for gaining experience on large scale enterprise applications,A Lot Of Shifting Priorities,Safaricom
2,project manager,4.0,2025-07-15,"Former employee, more than 3 years",Former,employee,>3 year,"strong support for professional growth through training exposure to largescale, impactful projects across kenya competitive salary and performancebased bonuses",Occasional Tight Deadlines And HighPressure Timelines Bureaucracy Can Delay Project SignOffs,Safaricom
3,ict technician,5.0,2025-07-18,Former employee,Former,employee,default,the working experience was good,There Are No Major Drawbacks I Can Talk About,Safaricom
4,intern,5.0,2025-07-15,"Former intern, less than 1 year",Former,internship,<1 year,great work environment. regular bonuses.,None I Can Think Of,Safaricom
...,...,...,...,...,...,...,...,...,...,...
25036,planning officer,4.0,2022-03-31,Former employee,Former,employee,default,have good medical covers for employees,terms of employment are contract,Equity
25037,senior business analyst,4.0,2022-05-14,Current employee,Current,employee,default,has good learning potential for staff,Working schedule is not flexible,Equity
25038,senior program manager,3.0,2016-05-02,Former employee,Former,employee,default,potential for great social impact,Can be slow at times,Equity
25039,ban assurance officer,1.0,2022-10-17,"Former employee, more than 1 year",Former,employee,1 - 3 year,medical insurance cover. low interest loans for staff,Bad management Less than appropriate pay Extremely long work hours Extensive workload Toxic work culture,Equity


In [27]:
# load spacy
nlp = spacy.load("en_core_web_lg")

# patterns = [
#            "ict technician", "ict officer", "ict manager", "ict consultant", "ict manager", "ict project manager",
#            "ict security analyst", "ict service desk analyst", "ict - software engineer", "ict applications support"]

# adding entity rule
matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
unique_titles = data2["Job Title"].dropna().unique().tolist()
pattern_doc = [nlp.make_doc(title.lower()) for title in unique_titles]
matcher.add("JOB_ROLE", pattern_doc)

status_map = {
    "ft": "Full-time",
    "full-time": "Full-time",
    "full time": "Full-time",
    "intern": "Internship",
    "internship": "Internship",
    "intern (paid)": "Internship",
    "contract": "Contract",
    "contractor": "Contract"
}
mapping = {"one": 1.0, "two": 2.0,"three": 3.0, "four": 4.0,"five":5.0}

def clean_status(status):
    if pd.isna(status): return None
    if "intern" in status: return "Internship"
    if "contract" in status: return "Contract"
    if "full" in status: return "Full-time"
    status = status.lower().strip()
    return status_map.get(status, status.title())

# normalize the job rating
def clean_rating(rating):
    if pd.isna(rating): return None
    if isinstance(rating, str):
        rating = rating.lower().strip()
        if rating in rating_map:
            return rating_map(rating)
        match = re.search(r"[0-9.]+", rating)
        return float(match.group()) if match else None
    return float(rating)

# text cleaning with spacy
def spacy_clean(text):
    if not isinstance(text,str) or not text.strip():
        return "", []
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    matches = matcher(doc)
    job_roles = [doc[start:end].text for _, start, end in matches]
    return " ".join(tokens), {"NER": entities, "ROLES": job_roles}
        
def process_row(row):
    job_title_clean, job_title_entities = spacy_clean(row.get("Job Title", ""))
    pros_clean, pros_entities = spacy_clean(row.get("Pros", ""))
    cons_clean, cons_entities = spacy_clean(row.get("Cons", ""))

    return pd.Series({
        "Job Title Clean": job_title_clean,
        "Job Title Entities": job_title_entities,
        "Pros Clean": pros_clean,
        "Pros Entities": pros_entities,
        "Cons Clean": cons_clean,
        "Cons Entities": cons_entities,
        "Cleaned Status": clean_status(row.get("Job Status", "")),
        "Cleaned Rating": clean_rating(row.get("Job Rating", ""))
    })

data2_cleaned = data2.apply(process_row, axis=1)
print(data2_cleaned.head())

           Job Title Clean  \
0         brand ambassador   
1  senior mobile developer   
2          project manager   
3           ict technician   
4                   intern   

                                                                   Job Title Entities  \
0                   {'NER': [], 'ROLES': ['brand', 'brand ambassador', 'ambassador']}   
1  {'NER': [], 'ROLES': ['senior mobile developer', 'mobile developer', 'developer']}   
2                     {'NER': [], 'ROLES': ['project', 'project manager', 'manager']}   
3         {'NER': [('ict', 'ORG')], 'ROLES': ['ict', 'ict technician', 'technician']}   
4                                                    {'NER': [], 'ROLES': ['intern']}   

                                                                                                                          Pros Clean  \
0                                                flexible hour supportive team good training fun work environment opportunity travel   
1           

In [28]:
nlp("data scientist").similarity(nlp("machine learning engineer"))

0.5121792330607114

In [29]:
nlp("internship").similarity(nlp("full-time"))

0.22726951282021637

In [30]:
nlp("project manager").similarity(nlp("team lead"))

0.5455771796658736

In [31]:
data2.columns

Index(['Job Title', 'Job Rating', 'Time', 'Job Status', 'Employment_Type',
       'Role_Type', 'Duration', 'Pros', 'Cons', 'Company_Name'],
      dtype='object')

In [32]:
# apply the cleaning
data2["Employment_Type"] = data2["Employment_Type"].apply(clean_status)

In [33]:
data2["Job Rating"] = data2["Job Rating"].apply(clean_rating)

In [34]:
data2["Company_Name"] = data2["Company_Name"].str.lower().str.replace('r[^a-zA-Z]','', regex=True).str.strip()

In [None]:
data2[["Job Title Entities", "Job Title"]].head(50)

In [None]:
data2_cleaned

# Transformers implementation

In [40]:
data2_cleaned["Time"] = data2["Time"]
data2_cleaned["Employment_Type"] = data2["Employment_Type"]
data2_cleaned["Role_Type"] = data2["Role_Type"]
data2_cleaned["Duration"] = data2["Duration"]
data2_cleaned["Company_Name"] = data2["Company_Name"]

In [43]:
data2_cleaned['combined_text'] = (
    data2_cleaned['Job Title Clean'].fillna('') + ' ' +
    data2_cleaned['Pros Clean'].fillna('') + ' ' +
    data2_cleaned['Cons Clean'].fillna('') + ' ' +
    data2_cleaned['Role_Type'].fillna('')
)


Unnamed: 0,Job Title Clean,Job Title Entities,Pros Clean,Pros Entities,Cons Clean,Cons Entities,Cleaned Status,Cleaned Rating,Time,Employment_Type,Role_Type,Duration,Company_Name
0,brand ambassador,"{'NER': [], 'ROLES': ['brand', 'brand ambassador', 'ambassador']}",flexible hour supportive team good training fun work environment opportunity travel,"{'NER': [], 'ROLES': ['good', 'training']}",inconsistent hour limited career growth commission structure unclear stand long hour slow communication,"{'NER': [('hours', 'TIME'), ('long hours', 'TIME')], 'ROLES': []}","Former Employee, More Than 1 Year",5.0,2025-08-08,Former,employee,1 - 3 year,safaricom
1,senior mobile developer,"{'NER': [], 'ROLES': ['senior mobile developer', 'mobile developer', 'developer']}",great gain experience large scale enterprise application,"{'NER': [], 'ROLES': ['great']}",lot shift priority,"{'NER': [], 'ROLES': []}",Contract,4.0,2025-07-28,Current,contractor,1 - 3 year,safaricom
2,project manager,"{'NER': [], 'ROLES': ['project', 'project manager', 'manager']}",strong support professional growth training exposure largescale impactful project kenya competitive salary performancebased bonus,"{'NER': [('kenya', 'GPE')], 'ROLES': ['support', 'professional', 'training', 'projects']}",occasional tight deadline highpressure timeline bureaucracy delay project signoff,"{'NER': [], 'ROLES': ['project']}","Former Employee, More Than 3 Years",4.0,2025-07-15,Former,employee,>3 year,safaricom
3,ict technician,"{'NER': [('ict', 'ORG')], 'ROLES': ['ict', 'ict technician', 'technician']}",working experience good,"{'NER': [], 'ROLES': ['good']}",major drawback talk,"{'NER': [], 'ROLES': []}",Former Employee,5.0,2025-07-18,Former,employee,default,safaricom
4,intern,"{'NER': [], 'ROLES': ['intern']}",great work environment regular bonus,"{'NER': [], 'ROLES': ['great']}",think,"{'NER': [], 'ROLES': []}",Internship,5.0,2025-07-15,Former,internship,<1 year,safaricom
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25036,planning officer,"{'NER': [], 'ROLES': ['planning officer', 'officer']}",good medical cover employee,"{'NER': [], 'ROLES': ['good']}",term employment contract,"{'NER': [], 'ROLES': []}",Former Employee,4.0,2022-03-31,Former,employee,default,equity
25037,senior business analyst,"{'NER': [], 'ROLES': ['senior business analyst', 'business analyst', 'analyst']}",good learning potential staff,"{'NER': [], 'ROLES': ['good']}",work schedule flexible,"{'NER': [], 'ROLES': []}",Current Employee,4.0,2022-05-14,Current,employee,default,equity
25038,senior program manager,"{'NER': [], 'ROLES': ['senior program', 'senior program manager', 'program', 'program manager', 'manager']}",potential great social impact,"{'NER': [], 'ROLES': ['great']}",slow time,"{'NER': [], 'ROLES': []}",Former Employee,3.0,2016-05-02,Former,employee,default,equity
25039,ban assurance officer,"{'NER': [], 'ROLES': ['ban assurance officer', 'assurance', 'officer']}",medical insurance cover low interest loan staff,"{'NER': [], 'ROLES': []}",bad management appropriate pay extremely long work hour extensive workload toxic work culture,"{'NER': [('hours', 'TIME')], 'ROLES': ['management']}","Former Employee, More Than 1 Year",1.0,2022-10-17,Former,employee,1 - 3 year,equity


In [58]:
data2_cleaned

In [80]:
# load the transformer model
model = SentenceTransformer('all-MiniLM-L12-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [81]:
# generating job Embeddings
job_embeddings = model.encode(data2_cleaned['combined_text'].tolist(), show_progress_bar=True)

Batches:   0%|          | 0/783 [00:00<?, ?it/s]

In [82]:
# save Embeddings for later use
np.save("job_embeddings.npy", job_embeddings)

In [83]:
# load embedding
job_embeddings = np.load("job_embeddings.npy")

In [84]:
# handle user query
user_query = "I want a remote internship that pays well"

In [85]:
# convert to embedding
user_embedding = model.encode([user_query])

In [88]:
# compute similarity score
similarity_scores = cosine_similarity(user_embedding, job_embeddings)[0]
similarity_scores

array([0.32478243, 0.328838  , 0.22624011, ..., 0.20577152, 0.2428226 ,
       0.22375105], dtype=float32)

In [87]:
# rank and display top matches
data2_cleaned['similarity'] = similarity_scores
top_matches = data2_cleaned.sort_values(by='similarity', ascending=False).head(10)
top_matches[['Company_Name', 'Job Title Clean', 'similarity', 'Cleaned Rating', 'Role_Type']]

Unnamed: 0,Company_Name,Job Title Clean,similarity,Cleaned Rating,Role_Type
406,safaricom,remote support engineer,0.66934,5.0,internship
3564,google,software engineering intern ii,0.604513,5.0,employee
92,safaricom,technology intern,0.6024,4.0,contractor
19292,ibm,specialist infrastructure architecture,0.589775,5.0,employee
831,google,research intern,0.579647,5.0,employee
22205,cisco,digital channel intern,0.575103,5.0,employee
15663,ibm,cloud consulting intern,0.572754,4.0,employee
10707,ibm,software developer intern,0.569988,3.0,employee
24412,cisco,senior principal software engineer,0.567336,4.0,employee
13386,ibm,security intern,0.56494,4.0,employee


In [76]:
data2_cleaned.columns

Index(['Job Title Clean', 'Job Title Entities', 'Pros Clean', 'Pros Entities',
       'Cons Clean', 'Cons Entities', 'Cleaned Status', 'Cleaned Rating',
       'Time', 'Employment_Type', 'Role_Type', 'Duration', 'Company_Name',
       'combined_text', 'similarity'],
      dtype='object')

In [77]:
top_matches

Unnamed: 0,Job Title Clean,Job Title Entities,Pros Clean,Pros Entities,Cons Clean,Cons Entities,Cleaned Status,Cleaned Rating,Time,Employment_Type,Role_Type,Duration,Company_Name,combined_text,similarity
406,remote support engineer,"{'NER': [], 'ROLES': ['remote support', 'remote support engineer', 'support', 'support engineer', 'engineer']}",good environment training workmanship,"{'NER': [], 'ROLES': ['good', 'training']}",long sit hour relaxation hour,"{'NER': [('long sitting hours', 'TIME'), ('few relaxation hours', 'TIME')], 'ROLES': []}",Internship,5.0,2017-03-09,Former,internship,<1 year,safaricom,remote support engineer good environment training workmanship long sit hour relaxation hour internship,0.702584
11239,data engineer intern,"{'NER': [], 'ROLES': ['data engineer', 'data engineer intern', 'engineer', 'engineer intern', 'intern']}",team nice willing help ask,"{'NER': [], 'ROLES': []}",personally like fully remote internship,"{'NER': [], 'ROLES': ['internship']}","Former Employee, More Than 1 Year",4.0,2024-08-05,Former,employee,1 - 3 year,ibm,data engineer intern team nice willing help ask personally like fully remote internship employee,0.662719
3564,software engineering intern ii,"{'NER': [], 'ROLES': ['software', 'software engineering', 'software engineering intern', 'software engineering intern ii', 'engineering', 'engineering intern', 'intern']}",largely relate remote working experience summer intern entirely remote work clear focus maintain highlevel communication likely support google crosscampus nature google manage remote work people receptive communication outside team take ping big deal,"{'NER': [('summer 2020', 'DATE'), ('google', 'ORG'), ('google', 'ORG')], 'ROLES': ['interns', 'it', 'it']}",free food coffee remote working,"{'NER': [], 'ROLES': []}","Former Employee, Less Than 1 Year",5.0,2021-02-01,Former,employee,<1 year,google,software engineering intern ii largely relate remote working experience summer intern entirely remote work clear focus maintain highlevel communication likely support google crosscampus nature google manage remote work people receptive communication outside team take ping big deal free food coffee remote working employee,0.63695
13386,security intern,"{'NER': [], 'ROLES': ['security', 'security intern', 'intern']}",position pay get opportunity work high level enterprise security project complete degree,"{'NER': [], 'ROLES': ['security', 'projects']}",work remotely hard connect day assignment work,"{'NER': [], 'ROLES': ['it', 'many']}","Former Employee, More Than 1 Year",4.0,2020-07-29,Former,employee,1 - 3 year,ibm,security intern position pay get opportunity work high level enterprise security project complete degree work remotely hard connect day assignment work employee,0.604098
13694,intern marketing,"{'NER': [], 'ROLES': ['intern', 'intern marketing', 'marketing']}",ibm new york good internship experience want pretty interesting witness company transform,"{'NER': [('ibm', 'ORG'), ('new york', 'GPE')], 'ROLES': ['internship', 'it', 'it']}",hq armonk especially millennial,"{'NER': [('armonk', 'GPE')], 'ROLES': []}","Former Employee, More Than 1 Year",5.0,2017-11-05,Former,employee,1 - 3 year,ibm,intern marketing ibm new york good internship experience want pretty interesting witness company transform hq armonk especially millennial employee,0.602584
239,network intern,"{'NER': [], 'ROLES': ['network', 'network intern', 'intern']}",training certification course provide,"{'NER': [], 'ROLES': ['training']}",long hour work,"{'NER': [], 'ROLES': []}",Internship,4.0,2023-04-21,Former,internship,default,safaricom,network intern training certification course provide long hour work internship,0.588756
9818,law internship,"{'NER': [], 'ROLES': ['law internship', 'internship']}",employer friendly kind person good place start internship law student,"{'NER': [], 'ROLES': ['good', 'internship', 'student']}",salary internee,"{'NER': [], 'ROLES': []}","Current Employee, Less Than 1 Year",3.0,2012-06-07,Current,employee,<1 year,microsoft,law internship employer friendly kind person good place start internship law student salary internee employee,0.58327
103,software,"{'NER': [], 'ROLES': ['software', 'software engineer(internship)']}",good opportunity maybe retain,"{'NER': [], 'ROLES': ['good']}",pay low accord industry standard,"{'NER': [], 'ROLES': []}",Internship,4.0,2024-10-13,Former,internship,<1 year,safaricom,software good opportunity maybe retain pay low accord industry standard internship,0.578041
6433,software engineer apprentice,"{'NER': [], 'ROLES': ['software', 'software engineer', 'software engineer apprentice', 'engineer', 'apprentice']}",remote equipment setup pay weekly hand code experience learn typescript software development experience valuable day day life engineer project life cycle,"{'NER': [('weekly', 'DATE'), ('the day to day', 'DATE')], 'ROLES': ['software', 'software development', 'engineer', 'project']}",see intern role experience role state apprentice background build big barrier good network maybe consider go office team hybrid,"{'NER': [], 'ROLES': ['intern', 'apprentice', 'network']}","Former Employee, Less Than 1 Year",4.0,2023-10-22,Former,employee,<1 year,microsoft,software engineer apprentice remote equipment setup pay weekly hand code experience learn typescript software development experience valuable day day life engineer project life cycle see intern role experience role state apprentice background build big barrier good network maybe consider go office team hybrid employee,0.573422
15987,senior edi consultant,"{'NER': [('edi', 'ORG')], 'ROLES': ['senior edi consultant', 'edi consultant', 'consultant']}",work flexibilty learn opportunity remote work great place,"{'NER': [], 'ROLES': ['great']}",great place,"{'NER': [], 'ROLES': ['its', 'great']}","Current Employee, More Than 5 Years",5.0,2024-06-17,Current,employee,default,ibm,senior edi consultant work flexibilty learn opportunity remote work great place great place employee,0.570347
