In [50]:
import numpy as np
import pandas as pd
import re
import nltk

behavioral_qs = pd.read_csv('Behavioral_questions.csv', encoding= 'unicode_escape')
features = behavioral_qs.iloc[:, 2].values
labels = behavioral_qs.iloc[:, 1].values
features

array(["I cannot say I've received much criticism.  I'm a honest employee and value my work.",
       "I have a proven track record as an achiever\x85many of your achievements match\xa0up with the employer's greatest wants and needs.\xa0",
       'You\xa0 know, I really can\x92t think of anything. I would add that as a\xa0 general management principle, I\x92ve found that the best way to avoid regrets is to avoid\xa0 causing them in the first place.',
       "I don't have a specific role model, but I did admire my high school basketball coach.",
       'I practice one habit that helps me a great deal in this\xa0 regard. At the end of each day, I mentally review the day\x92s events and conversations to\xa0 take a second look at the people and developments I\x92m involved with',
       'No way. I need weekends to myself as a personal boundary.',
       'From my side, there are strong benefits, as well. Right now, I am unemployed. I want\xa0 to work, very much, and the position you have he

In [51]:
processed_features = []

for sentence in range(0, len(features)):
    # Remove all the special characters
    processed_feature = re.sub(r'\W', ' ', str(features[sentence])) 

    # Substituting multiple spaces with single space
    processed_feature = re.sub(r'\s+', ' ', processed_feature, flags=re.I)
    
    # remove all single characters
    processed_feature = re.sub(r'(?:^| )\w(?:$| )', ' ', processed_feature)
    processed_feature = re.sub(r'(?:^| )\w(?:$| )', ' ', processed_feature)

    # Converting to Lowercase
    processed_feature = processed_feature.lower()

    processed_features.append(processed_feature)
    
processed_features

[' cannot say ve received much criticism honest employee and value my work ',
 ' have proven track record as an achiever many of your achievements match up with the employer greatest wants and needs ',
 'you know really can think of anything would add that as general management principle ve found that the best way to avoid regrets is to avoid causing them in the first place ',
 ' don have specific role model but did admire my high school basketball coach ',
 ' practice one habit that helps me great deal in this regard at the end of each day mentally review the day events and conversations to take second look at the people and developments involved with',
 'no way need weekends to myself as personal boundary ',
 'from my side there are strong benefits as well right now am unemployed want to work very much and the position you have here is exactly what love to do and am best at ll be happy doing this work and that what matters most to me lot more that money or title ',
 'in my experience

In [52]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=70, min_df=0.05, stop_words=stopwords.words('english'))
processed_features = vectorizer.fit_transform(processed_features).toarray()
processed_features

array([[0.        , 0.        , 0.        , ..., 0.        , 0.48227107,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.59333137, 0.21435683, ..., 0.        , 0.        ,
        0.18413209],
       ...,
       [0.        , 0.        , 0.21442293, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.52201548, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.4331065 ]])

In [53]:
print('vocabulary: ', vectorizer.vocabulary_)

vocabulary:  {'say': 55, 'received': 50, 'much': 17, 'work': 68, 'proven': 47, 'track': 62, 'record': 51, 'employer': 6, 'needs': 21, 'really': 49, 'think': 61, 'would': 69, 'principle': 44, 'found': 9, 'best': 2, 'way': 65, 'avoid': 1, 'regrets': 53, 'first': 8, 'place': 36, 'specific': 58, 'school': 56, 'practice': 41, 'one': 29, 'regard': 52, 'day': 4, 'take': 59, 'people': 34, 'involved': 11, 'need': 19, 'personal': 35, 'want': 64, 'position': 38, 'plenty': 37, 'let': 13, 'answer': 0, 'like': 14, 'positive': 39, 'question': 48, 'identify': 10, 'profession': 46, 'next': 25, 'months': 15, 'problem': 45, 'overlooking': 33, 'precedent': 42, 'try': 63, 'whatever': 66, 'could': 3, 'wider': 67, 'offending': 27, 'necessary': 18, 'non': 26, 'science': 57, 'never': 23, 'pressed': 43, 'enjoyed': 7, 'others': 32, 'tasks': 60, 'open': 30, 'relocate': 54, 'job': 12, 'moving': 16, 'new': 24, 'potential': 40, 'different': 5, 'negative': 22, 'needed': 20, 'offer': 28, 'opportunity': 31}


In [54]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(processed_features, labels, test_size=0.3, random_state=0)
model = RandomForestClassifier(max_depth=8, random_state=0)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print(score)


0.8333333333333334


In [55]:
# User test 1 - good response
user_test = vectorizer.transform(['My whole management approach is to hire the best people I can find, train them thoroughly and well, get them excited and proud to be part of our team, and then work with them to achieve our goals together. If you do all of that right, especially hiring the right people, I’ve found you don’t have to fire very often. So with me, firing is a last resort. But when it’s got to be done, it’s got to be done, and the faster and cleaner, the better.']).toarray()
model.predict(user_test)

array([1], dtype=int64)

In [56]:
# User test 2 - bad response
user_test2 = vectorizer.transform(['I\'m unsure on what I want to do yet in the industry, but I\'m open to trying new things.']).toarray()
model.predict(user_test2)

array([0], dtype=int64)

In [57]:
# Full user interaction simulation:
points = 0
user_input = input("Interview Question: What would you say to your boss if he/she is passionate about an idea, but you think it is a bad one?\n\n")
input_vectors = vectorizer.transform([user_input]).toarray()
rating = model.predict(input_vectors)

if rating[0] == 1:
    points += 5
    print("\nGreat response! You've earned 5 behavioral points!")
else:
    print("\nThere's some room for improvement.")

for word in user_input:
    # keywords selected from generated vocabulary above
    if word in ["received", "practice", "enjoyed", "positive", "necessary", "potential", "opportunity", "offer", "tasks", "profession", "people", "involved", "principle", "position", "found"]:
        # bonus points
        points += 1
print("After considering your word choice, you gained a total of", points, "points.")

# Example of a good response: My goal in this case would be to see if my boss and I could make his idea even stronger 
# and more appealing, so that it effectively overcomes any initial reservation I or others may have about it.

Interview Question: What would you say to your boss if he/she is passionate about an idea, but you think it is a bad one?

My goal in this case would be to see if my boss and I could make his idea even stronger and more appealing, so that it effectively overcomes any initial reservation I or others may have about it.

Great response! You've earned 5 behavioral points!
After considering your word choice, you gained a total of 5 points.
