# Example of Our Product In Action

In [0]:
%pip install google.generativeai
%pip install transformers
%pip install torch

Python interpreter will be restarted.
Python interpreter will be restarted.
Python interpreter will be restarted.
Collecting transformers
  Downloading transformers-4.39.3-py3-none-any.whl (8.8 MB)
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (738 kB)
Collecting huggingface-hub<1.0,>=0.19.3
  Downloading huggingface_hub-0.22.2-py3-none-any.whl (388 kB)
Collecting tokenizers<0.19,>=0.14
  Downloading tokenizers-0.15.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Collecting safetensors>=0.4.1
  Downloading safetensors-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting fsspec>=2023.5.0
  Downloading fsspec-2024.3.1-py3-none-any.whl (171 kB)
Installing collected packages: pyyaml, fsspec, huggingface-hub, tokenizers, safetensors, transformers
Successfully installed fsspec-2024.3.1 huggingface-hub-0.22.2 pyyaml-6.0.1 safetensors-0.4.2 tokenizers-0.15.2 transformers-4.39.3
Python

In [0]:
from pyspark.sql.types import *
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
import pandas as pd
from pyspark.sql.functions import rand
import math
import numpy as np
from IPython.display import Markdown


pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

spark = SparkSession.builder.getOrCreate()



### Load Necessary DataFrames

In [0]:
# Load companies' features & ranked similarities 
company_features = spark.read.parquet('/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/company_features_similarities_small.parquet', header=True)

# Load user features
user_feature_vectors = spark.read.parquet('/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/user_feature_vectors.parquet', header=True)

# Load companies with features and jobs
company_features_jobs = spark.read.parquet("/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/company_with_job_features.parquet", header=True)

In [0]:
 ordered_features = ['years of education',
 'years of experience',
 'recommendations',
 'volunteer experience',
 'english',
 'spanish',
 'chinese',
 'tagalog',
 'vietnamese',
 'french',
 'korean',
 'german',
 'arabic',
 'russian',
 'italian',
 'portuguese',
 'polish',
 'hindi',
 'japanese',
 'urdu',
 'gujarati',
 'persian',
 'telugu',
 'tamil',
 'greek',
 'armenian',
 'haitian creole',
 'filipino',
 'bengali',
 'panjabi',
 'kannada',
 'malayalam',
 'marathi',
 'odia',
 'sindhi',
 'sinhala',
 'assamese',
 'nepali',
 'haryanvi',
 'rajasthani',
 'chhattisgarhi',
 'maithili',
 'bhojpuri',
 'magahi',
 'kashmiri',
 'hebrew',
 'high school diploma',
 'associate\'s degree',
 'bachelor\'s degree',
 'master\'s degree',
 'doctorate\'s degree']

### Randomly Select a Job and an Interested User


In [0]:
job_posting = company_features_jobs.orderBy(rand(seed=2)).limit(1)
company_name = job_posting.select('name').collect()[0][0]
job_name = job_posting.select('job').collect()[0][0]

print("company:", company_name)
print("job:", job_name)
print("Number of employees in the company:", job_posting.select('num_emp').collect()[0][0])

company: northwestern mutual
job: Recruiter
Number of employees in the company: 576


In [0]:
user = user_feature_vectors.orderBy(rand(seed=22)).limit(1)
user_features = user.select('features').collect()[0][0].toArray().tolist()
user_id = user.select('id').collect()[0][0]

print("User ID:", user_id)
print("User features:")
for i in range(len(user_features)):
    print(f"{ordered_features[i]}: {user_features[i]}")

User ID: susan-chon-b32638123
User features:
years of education: 11.0
years of experience: 14.666666666666666
recommendations: 0.0
volunteer experience: 0.0
english: 1.0
spanish: 0.0
chinese: 0.0
tagalog: 0.0
vietnamese: 0.0
french: 0.0
korean: 1.0
german: 0.0
arabic: 0.0
russian: 0.0
italian: 0.0
portuguese: 0.0
polish: 0.0
hindi: 0.0
japanese: 0.0
urdu: 0.0
gujarati: 0.0
persian: 0.0
telugu: 0.0
tamil: 0.0
greek: 0.0
armenian: 0.0
haitian creole: 0.0
filipino: 0.0
bengali: 0.0
panjabi: 0.0
kannada: 0.0
malayalam: 0.0
marathi: 0.0
odia: 0.0
sindhi: 0.0
sinhala: 0.0
assamese: 0.0
nepali: 0.0
haryanvi: 0.0
rajasthani: 0.0
chhattisgarhi: 0.0
maithili: 0.0
bhojpuri: 0.0
magahi: 0.0
kashmiri: 0.0
hebrew: 0.0
high school diploma: 1.0
associate's degree: 1.0
bachelor's degree: 1.0
master's degree: 1.0
doctorate's degree: 1.0


### Give Initial Feedback to User

In [0]:
# Define cosine similarity function
def cosine_similarity(vector1, vector2):
    dot_product = sum(x * y for x, y in zip(vector1, vector2))
    magnitude_vector1 = math.sqrt(sum(x ** 2 for x in vector1))
    magnitude_vector2 = math.sqrt(sum(y ** 2 for y in vector2))
    return 0 if magnitude_vector1 == 0 or magnitude_vector2 == 0 else dot_product / (magnitude_vector1 * magnitude_vector2)

def find_insert_position(sorted_list, new_number):
    # Iterate through the sorted list
    for i, num in enumerate(sorted_list):
        # If the new number is smaller or equal to the current number, return the index
        if new_number <= num:
            return i
    # If the new number is greater than all numbers in the list, return the length of the list
    return len(sorted_list)

def get_rank(user_features, user_company):
    user_similarity = cosine_similarity(user_features, user_company[MEAN_VEC])
    percentage_rank = find_insert_position(user_company[SORTED_SIMILARITIES], user_similarity) / len(user_company[SORTED_SIMILARITIES])
    return percentage_rank

In [0]:
NUM_EMP = 2
THRESHOLD = 0 # Change as desired
MEAN_VEC = 1
SORTED_SIMILARITIES = 3

def calc_similarity_score(user_company, user_features):

    user_company = company_features.filter(F.col('current_company_name') == user_company).first()

    # Assume we have in our data all companies we allow access to this feature from.  
    rank = get_rank(user_features, user_company)
    mean_vec = user_company[MEAN_VEC]

### Since we are currently working with select companies, we will not use the similarity algorithm we developed in this example. However, we assume when our feature is incorporated in LinkedIn we will have the necessary additional resources to amend this. We can find a similar company like we do later to find questions, but we prefer not to duplicate the code as it's function is identical. 

    if user_company[NUM_EMP] < THRESHOLD:

        # Among companies with at least THRESHOLD employees?
        similar_company, comp_similarity_score = None 
        sim_comp_rank = get_rank(user_features, similar_company)
        weight = (similar_company[NUM_EMP] / (similar_company[NUM_EMP] + user_company[NUM_EMP]))*comp_similarity_score
        rank = weight*sim_comp_rank + (1 - weight)*rank

        mean_vec = weight * similar_company[MEAN_VEC] + (1 - weight)*mean_vec

    return rank*100, mean_vec

def prepare_message(rank, mean_vec, user_features):

    msg = f'Your perliminary calculated chances of suitability for this job are {rank:.1f}%. \n\n'

    good_msg = ''
    bad_msg = ''

    good_feats = []
    bad_feats = []    
    for i in range(3):
        feat = user_features[i]
        mean_feat = mean_vec[i]
        if feat >= mean_feat:
            good_feats.append(ordered_features[i])
        else:
            bad_feats.append((ordered_features[i], math.ceil(mean_feat - feat)))

    if len(good_feats):
        good_msg += f'You have matched or exceeded the recommended amount of {", ".join(good_feats)} for this job. '

    if len (bad_feats):
        bad_msg += f'You have {", ".join([str(f[1]) + " less " + f[0] for f in bad_feats])} than recommended for this job. '

    if mean_vec[3] > 0.2 and not user_features[3]:
        bad_msg += 'You have not reported any volunteer experience, and it is recommended to have for this job. '

    elif mean_vec[3] > 0.2 and user_features[3]:
        good_msg += 'You have reported having volunteer experience, which is recommended to have for this job. '

    langs = np.where(np.array(mean_vec)[4:-5] >= 0.2)[0]
    good_langs = []
    bad_langs = []

    for lang in langs:
        if user_features[4:-5][lang]:
            good_langs.append(ordered_features[4:-5][lang])
        else:
            bad_langs.append(ordered_features[4:-5][lang])

    if good_langs:
        good_msg += f'You have proficiency in the recommended languages: {", ".join(good_langs)}. '

    if bad_langs:
        bad_msg += f'You lack proficiency in the recommended languages: {", ".join(bad_langs)}. '

    for mean_deg in [-1, -2, -3, -4, -5]:
        if mean_vec[mean_deg] >= 0.5:
            break

    for user_deg in [-1, -2, -3, -4, -5]:
        if mean_vec[user_deg] >= 0.5:
            break

    if user_deg >= mean_deg:
        good_msg += f'The recommended education level is a {ordered_features[mean_deg]}, which you achieved. '

    else:
        bad_msg += f'The recommended education level is a {ordered_features[mean_deg]}, and you have a {ordered_features[user_deg]}. '

    msg += good_msg + '\n\n' + bad_msg 
    msg_dict = {'good': good_msg,
                'bad': bad_msg}

    return msg, msg_dict

In [0]:
def get_info(user_features, user_company):

    user_score, mean_vec = calc_similarity_score(user_company, user_features)
    msg, msg_dict = prepare_message(user_score, mean_vec, user_features)

    return msg, msg_dict

In [0]:
msg, msg_dict = get_info(user_features, company_name)
Markdown(msg)

Your perliminary calculated chances of suitability for this job are 63.2%. 

You have matched or exceeded the recommended amount of years of education, years of experience for this job. The recommended education level is a bachelor's degree, which you achieved. 

You have 1 less recommendations than recommended for this job. 

### Feed Data To Generative Model to Begin Interview Simulation

#### Configure Gemini Model

In [0]:
import pathlib
import textwrap
from IPython.display import display
from IPython.display import Markdown
import google.generativeai as genai


In [0]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
  
api_key = 'AIzaSyAED20eopE-GdR02Sn0rnPTWootmKilOpM'
genai.configure(api_key=api_key)

In [0]:
gemini_model = genai.GenerativeModel('gemini-pro')
INPUT_TOKEN_LIMIT = 30720

#### Find Relevant Questions

In [0]:
# Scraped question data
questions_df = spark.read \
    .csv("/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/questions.csv", header=True)
questions_df.show(10)

+--------------+--------+--------------------+
|     data_name|category|            question|
+--------------+--------+--------------------+
|American Water|    null|1. The utilities ...|
|American Water|    null|2. Our industry i...|
|American Water|    null|3. Have you perso...|
|American Water|    null|4. American Water...|
|American Water|    null|5. Why do you wan...|
|American Water|    null|6. What are your ...|
|American Water|    null|7. At American Wa...|
|American Water|    null|8. Why do you wan...|
|American Water|    null|9. Tell me about ...|
|American Water|    null|10. If we hire yo...|
+--------------+--------+--------------------+
only showing top 10 rows



In [0]:
# import re
# # Define data preprocessing functions
# def clean_text(text):
#     # Convert text to lowercase
#     text = text.lower()
#     # Remove punctuation
#     text = re.sub(r'[^\w\s]', '', text)
#     return text

# # Register UDF for data preprocessing
# clean_text_udf = udf(clean_text, StringType())

In [0]:
# # Load companies df
# companies = spark.read.parquet('/linkedin/companies')

# # find records for informative companies:
# informative_companies_names = questions_df.select(clean_text_udf(F.col('data_name')).alias('name')).distinct()
# informative_companies_df = companies.withColumn('clean_name', clean_text_udf(F.col('name'))) \
#     .join(informative_companies_names, F.col('clean_name') == informative_companies_names.name, "semi")

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-3791678015591082>[0m in [0;36m<cell line: 5>[0;34m()[0m
[1;32m      3[0m [0;34m[0m[0m
[1;32m      4[0m [0;31m# find records for informative companies:[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0;32m----> 5[0;31m [0minformative_companies_names[0m [0;34m=[0m [0mquestions_df[0m[0;34m.[0m[0mselect[0m[0;34m([0m[0mclean_text_udf[0m[0;34m([0m[0mcol[0m[0;34m([0m[0;34m'data_name'[0m[0;34m)[0m[0;34m)[0m[0;34m.[0m[0malias[0m[0;34m([0m[0;34m'name'[0m[0;34m)[0m[0;34m)[0m[0;34m.[0m[0mdistinct[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m      6[0m [0minformative_companies_df[0m [0;34m=[0m [0mcompanies[0m[0;34m.[0m[0mwithColumn[0m[0;34m([0m[0;34m'clean_name'[0m[0;34m,[0m [0mclean_text_udf[0m[0;34m([0m[0mcol[0m[0;34m(

In [0]:
from pyspark.sql.functions import col, lower

informative_companies_df = spark.read.parquet("/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/companies_have_qs.parquet")

# find record for current company:
my_company = company_features_jobs.filter(col("name") == company_name).limit(1)
print(my_company.count())
my_company.select('name').show()

1
+-------------------+
|               name|
+-------------------+
|northwestern mutual|
+-------------------+



In [0]:
from pyspark.sql.functions import lit

similarity_features_cols_short = ['name', 'about', 'company_size','slogan']
similarity_features_cols = ['company_A_name', 'company_B_name', 'company_A_about', 'company_B_about', 'company_A_company_size', 'company_B_company_size', 'company_A_slogan', 'company_B_slogan']

df_alias = informative_companies_df.alias("df")
single_row_alias = my_company.alias("single_row")

for col_name in similarity_features_cols_short:
    single_row_alias = single_row_alias.withColumnRenamed(col_name, "company_A_" + col_name)
    df_alias = df_alias.withColumnRenamed(col_name, "company_B_" + col_name)
df_alias = df_alias.withColumnRenamed('url', "company_B_url")

# Cross join the single row with the entire DataFrame
combined_df = single_row_alias.crossJoin(df_alias)
combined_df = combined_df.select(*similarity_features_cols, 'company_B_url')

# Show the result
combined_df.count()

Out[40]: 28

In [0]:
# NLP imports:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Pyspark imports
from pyspark.sql.functions import udf, abs, size, col, when, concat, array, lit, rand, count, explode, regexp_replace, expr
from pyspark.sql import functions as F
from pyspark.sql.types import DoubleType, IntegerType

# download NLTK stopwords:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

nltk.download('punkt')
stemmer = PorterStemmer()

# define a function for stemming:
def stem_text(text):
    """
    Tokenize text and remove stopwords.
    input: text to tokenize
    output: preprocessed text
    """
    tokens = [word for word in word_tokenize(text.lower()) if word.isalpha() and word not in stop_words]
    stemmed_tokens = [stemmer.stem(token) for token in tokens]
    stemmed_text = ' '.join(stemmed_tokens)
    return stemmed_text

# define a function to calculate similarity between 2 text blocks:
def calculate_similarity(text1, text2, stemming=True):
    """
    Find cosine similarity between tf-idf vectors of the input texts.
    ouput: single similarity score
    """
    # if one or more of the texts are empty, define as 0 similarity:
    if text1 is None or text2 is None:
        return 0.0
    
    # preprocess the texts if neccesary: (not good for short imputs like names)
    if stemming:
        text1 = stem_text(text1)
        text2 = stem_text(text2)

    # create TF-IDF vectors:
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])

    # calculate cosine similarity:
    similarity_score = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    return float(similarity_score)

# udf for text similarity:
similarity_udf = udf(lambda text1, text2, stemming: calculate_similarity(text1, text2, stemming), DoubleType())

### Create general features:

def extract_small_features_from_data(data, train):
    """
    Assume data contains the columns in similarity_features_cols.
    Create a features df for random forest model
    """

    # about (with stemming):
    data = data.withColumn('about_similarity_score', similarity_udf(col('company_A_about'), col('company_B_about'), lit(True)))

    # name (without stemming):
    data = data.withColumn('name_similarity_score', similarity_udf(data['company_A_name'], data['company_B_name'], lit(False)))

    # slogan (without stemming):
    data = data.withColumn('slogan_similarity_score', similarity_udf(data['company_A_slogan'], data['company_B_slogan'], lit(False)))

    # interval numerical variable for compay size:
    comp_size_dict = {
        '': 0,
        'None': 0,
        '1 employee': 1,
        '2-10 employees': 2,
        '11-50 employees': 3,
        '51-200 employees': 4,
        '201-500 employees': 5,
        '501-1,000 employees': 6,
        '1,001-5,000 employees': 7,
        '5,001-10,000 employees': 8,
        '10,001+ employees': 9
    }
    comp_size_udf = udf(lambda x: comp_size_dict.get(x, 0), IntegerType())

    data = data.withColumn('company_A_numerical_size',
                            when(col('company_A_company_size').isNull() | 
                                    (col('company_A_company_size') == 'None') | 
                                    (col('company_A_company_size') == ''), 0)
                            .otherwise(comp_size_udf(col('company_A_company_size'))))
    data = data.withColumn('company_B_numerical_size',
                            when(col('company_B_company_size').isNull() | 
                                    (col('company_B_company_size') == 'None') | 
                                    (col('company_B_company_size') == ''), 0)
                            .otherwise(comp_size_udf(col('company_B_company_size'))))

    ## Interaction features between same variables (for better importance comparison):
    # sizes of companies interaction:
    data = data \
        .withColumn("size_interaction", col("company_A_numerical_size") * col("company_B_numerical_size"))


    # final features df:
    if train:
        final_data_features = data.select('name_similarity_score', 'about_similarity_score', 'slogan_similarity_score', 'company_A_numerical_size', 'company_B_numerical_size', 'size_interaction', 'similarity_label')
    else:
        final_data_features = data.select('name_similarity_score', 'about_similarity_score', 'slogan_similarity_score', 'company_A_numerical_size', 'company_B_numerical_size', 'size_interaction', 'company_B_url')

    return final_data_features

# UDF to extract the probability of class 1 from the vector
def extract_class_1_probability(probability_vector):
    return float(probability_vector[1])

# Register UDF
extract_prob = udf(extract_class_1_probability, FloatType())

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [0]:
from pyspark.ml.classification import RandomForestClassificationModel
from pyspark.ml import PipelineModel
from pyspark.sql.functions import col, udf
from pyspark.sql.types import FloatType

## Load your trained Random Forest model
model_path = "/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/companies_model/rf_companies_similarity"
model = PipelineModel.load(model_path)

# Extract features and include identifiers for prediction
df = extract_small_features_from_data(combined_df, train=False)

# Make predictions as before
predictions = model.transform(df)

# Extract the probability of class 1 and include identifiers
predictions_with_prob = predictions.withColumn("probability_class_1", extract_prob("probability"))

# Order by probability and retain the company identifier
max_prob_row = predictions_with_prob.orderBy(col("probability_class_1").desc()).select("company_B_url", "probability_class_1").first()

print(max_prob_row)

Row(company_B_url='https://www.linkedin.com/company/honeywell2022', probability_class_1=1.0)


In [0]:
url_of_best_informative_company = max_prob_row['company_B_url']

best_company_record = companies.filter(col("url") == url_of_best_informative_company)
best_company_name = best_company_record.first()['name']
print(best_company_name)

Honeywell


### Prompt

In [0]:
if informative_companies_df.filter(lower(col('name')) == company_name.lower()).count() > 0:
    similar_questions_df = questions_df.filter(lower(col('data_name')) == company_name.lower()) \
    .select('question')
else:
    similar_questions_df = questions_df.filter(lower(col('data_name')) == best_company_name.lower()) \
        .select('question')

similar_questions = [row['question'] for row in similar_questions_df.collect()]
similar_questions = "\n".join(q for q in similar_questions)
print(similar_questions)

1. Northwestern Mutual is client-centric. How do you continually strengthen relationships with your exciting client base?
2. Wealth management is an all encompassing term. How would you explain what we do at Northwestern Mutual to someone unfamiliar with our company?
3. As a wealth management professional, have you ever been asked to act unethically? If so, what did you do?
4. In an initial client meeting, how do you first assess a client's financial position?
5. How do you stay current on developments and trends in wealth management?
6. Walk me through your education and how it relates to a career in wealth management.
7. How would you explain mutual funds to someone unfamiliar with the concept?
8. This role with Northwestern Mutual is very independent. How do you prospect for new customers?
9. Tell me about the systems and tools that you lean on to do your job.
10. Do you prefer to work independently, or with partners?
11. Do you have your Certified Financial Planner (CFP) designatio

In [0]:
### example user analysis (format can be changed to anything as long as it is also changed in the prompt - next cell)

user_analysis = msg_dict

In [0]:
# Create the prompt:

initial_prompt = f"Pretend you are an interviewer at {company_name}. Ask me some questions in a job interview for a {job_name} position. Output the questions as a list."

if user_analysis is not None:
    initial_prompt += f"\nAsk me some questions specifically about my strengths: {user_analysis['good']}, and weaknesess: {user_analysis['bad']}"

if len(similar_questions) > 0:
    initial_prompt += f"\nHere are some example questions from similar companies:\n{similar_questions}"
else:
    initial_prompt += f"\nHere are some examples for interview questions:\n{example_questions}"

print(initial_prompt)

Pretend you are an interviewer at northwestern mutual. Ask me some questions in a job interview for a Recruiter position. Output the questions as a list.
Ask me some questions specifically about my strengths: You have matched or exceeded the recommended amount of years of education, years of experience for this job. The recommended education level is a bachelor's degree, which you achieved. , and weaknesess: You have 1 less recommendations than recommended for this job. 
Here are some example questions from similar companies:
1. Northwestern Mutual is client-centric. How do you continually strengthen relationships with your exciting client base?
2. Wealth management is an all encompassing term. How would you explain what we do at Northwestern Mutual to someone unfamiliar with our company?
3. As a wealth management professional, have you ever been asked to act unethically? If so, what did you do?
4. In an initial client meeting, how do you first assess a client's financial position?
5. 

In [0]:
# initial_prompt:
response = gemini_model.generate_content(initial_prompt)

# generate answers:
to_markdown(response.text)

> **Strengths Questions:**
> 
> * Describe a time when you successfully exceeded expectations in a recruiting role and the impact it had on your organization.
> * How have your previous experiences prepared you for the high-volume recruiting environment at Northwestern Mutual?
> * What are your strengths that you believe would make you a valuable asset to our team?
> 
> **Weakness Questions:**
> 
> * You have one less recommendation than recommended for this job. How do you plan to address this potential shortcoming?
> * What specific areas do you believe you need to improve in order to meet the expectations of this role?
> 
> **Additional Questions:**
> 
> * How do you stay up-to-date on industry best practices and trends in recruiting?
> * What strategies do you use to identify and attract top-tier candidates?
> * How do you handle the challenges of managing multiple recruiting pipelines simultaneously?
> * How do you assess the fit between a candidate and a specific position?
> * What is your approach to building and maintaining relationships with hiring managers and stakeholders?
> * How do you evaluate the success of your recruiting efforts?
> * What is your understanding of Northwestern Mutual's culture and values?
> * Why are you interested in working for Northwestern Mutual specifically?
> * What are your long-term career aspirations and how does this role align with them?

**now we pretend that the user pressed some button and wants to simulate an interview.**

In [0]:
# assume that the user clicked a question to answer:
question_picked = "Describe a time when you successfully exceeded expectations in a recruiting role and the impact it had on your organization."

interview_prompt_instruction = f"Pretend you are an interviewer, and you asked me the question: {question_picked}. Respond to my answer and give me an evaluation about my answer: "

In [0]:
### generate a theoretical user response with chatGPT4 as an example

# good example:
good_user_input = "In my previous role as a recruiting manager, I encountered a situation where our company urgently needed to fill several key positions within a short timeframe due to an unexpected surge in project demand. Recognizing the criticality of the situation, I immediately devised a strategic recruitment plan that involved leveraging multiple channels, including job boards, social media platforms, and professional networks, to source top-tier candidates. Additionally, I implemented streamlined screening and interview processes to expedite the hiring process without compromising quality. Through meticulous candidate assessment and proactive engagement, I successfully identified and recruited a highly talented pool of candidates within the specified timeframe, exceeding the hiring targets by 20%. This influx of skilled professionals significantly bolstered our team's capabilities, enabling us to deliver projects ahead of schedule and surpass client expectations. Moreover, the positive impact of this recruitment initiative extended beyond immediate project success, as it strengthened our organization's reputation as an employer of choice in the industry, attracting top talent and fostering long-term growth and success."

response = gemini_model.generate_content(interview_prompt_instruction + user_input)
to_markdown(response.text)

> **Interviewer's Response:**
> 
> Thank you for your thoughtful answer. I appreciate your enthusiasm and alignment with Amazon's culture of innovation. Your passion for pushing boundaries and driving change is evident in your response.
> 
> Specifically, your mention of leading cross-functional teams in developing and implementing cutting-edge technologies demonstrates your ability to collaborate effectively and drive results. Your commitment to operational efficiency and enhancing customer experiences also aligns with Amazon's priorities.
> 
> **Evaluation:**
> 
> Overall, your answer successfully exceeded expectations. You showcased:
> 
> * A clear understanding of Amazon's values and culture
> * A track record of innovation and driving change
> * A commitment to continuous learning and growth
> * A proactive approach to technology advancements
> * A strong alignment between your skills and Amazon's business objectives
> 
> Your response is well-structured and provides specific examples that demonstrate your capabilities. You have effectively highlighted how your experience and mindset would contribute to Amazon's culture of innovation and transformative impact.
> 
> **Next Steps:**
> 
> I would like to explore further with you how you have quantified the impact of your innovation efforts in previous roles. Can you provide specific metrics or case studies that demonstrate the tangible benefits and outcomes you have achieved?

In [0]:
# bad example:
bad_user_input = "Yeah, so there was this one time when I was doing recruiting stuff, and, like, we needed to hire some people real quick because, you know, we had a lot of work piling up and not enough hands to do it. So, I kinda just did what I usually do, like posting job ads online and stuff. I didn't really have a plan or anything, just kinda winged it. Anyway, we managed to hire a few folks eventually, but it took longer than expected, and we had to settle for some candidates who weren't exactly what we were looking for. It was kind of a mess, to be honest, and it didn't really have much of an impact on the organization. We got the work done eventually, but it wasn't anything to write home about."

response = gemini_model.generate_content(interview_prompt_instruction + user_input)
to_markdown(response.text)

> **Evaluation:**
> 
> Your answer effectively addresses the question by highlighting your commitment to innovation and forward-thinking. You provide specific examples of how you have exceeded expectations in your recruiting role and the impact it has had on your organization.
> 
> **Response:**
> 
> "Thank you for sharing your experience of successfully exceeding expectations in your recruiting role. Your commitment to innovation and forward-thinking is evident in your accomplishments, and I believe you would be a valuable asset to our team at Amazon.
> 
> Your ability to identify and recruit top talent, coupled with your understanding of emerging trends and technologies, aligns perfectly with our company's ethos as pioneers. Your proactive approach and dedication to driving transformative impact would make you an exceptional addition to our team.
> 
> I am particularly impressed with your leadership in developing and implementing cutting-edge technologies and spearheading initiatives to optimize operational efficiency. Your ability to think strategically and drive change aligns with our company's culture of innovation and growth.
> 
> I am confident that your skills and experience would enable you to make a significant contribution to Amazon. Your passion for exploration and dedication to excellence are qualities that we highly value in our team members.
> 
> Thank you again for sharing your experience. We would like to invite you to the next round of interviews to further explore your qualifications and how you can bring your innovative mindset to our organization."

#### Sentiment Analysis Inference

In [0]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch.nn.functional as F
import torch

In [0]:
model_weights_path = '/dbfs/FileStore/shared_uploads/naomi.derel@campus.technion.ac.il/sent_analys_model/model_weights.pth'

loaded_model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2).to(torch.device('cpu'))
loaded_model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu')))

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Out[67]: <All keys matched successfully>

In [0]:
def inference_of_answer(model, text):
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)

    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    if torch.backends.mps.is_available():
        model = model.to(torch.device('mps'))
        input_ids = to_cuda(input_ids)
        attention_mask = to_cuda(attention_mask)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    logits = outputs.logits
    probs = F.softmax(logits, dim=1)
    return [probs[0][0].item(), probs[0][1].item()]

In [0]:
# bad example:
prob_bad, prob_good = inference_of_answer(loaded_model, bad_user_input)

print(f"Good: {prob_good}, Bad: {prob_bad}")
print("Decision: " + "Good" if prob_good > prob_bad else "Decision: " + "Bad")

Good: 6.348652095766738e-05, Bad: 0.9999364614486694
Decision: Bad


In [0]:
# good example:
prob_bad, prob_good = inference_of_answer(loaded_model, good_user_input)

print(f"Good: {prob_good}, Bad: {prob_bad}")
print("Decision: " + "Good" if prob_good > prob_bad else "Decision: " + "Bad")

Good: 0.9999399185180664, Bad: 6.013096935930662e-05
Decision: Good
