In [1]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from textblob import TextBlob
import textstat

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv('new_combined_data.csv')

In [3]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [4]:
def get_sentence_embeddings_batch(texts, model, batch_size=32):
    """
    Generate sentence embeddings in batches using SentenceTransformer.

    Args:
        texts (list): List of input texts.
        model (SentenceTransformer): Preloaded SentenceTransformer model.
        batch_size (int): Batch size for processing texts.

    Returns:
        np.ndarray: Array of embeddings.
    """
    embeddings = []
    total_batches = (len(texts) + batch_size - 1) // batch_size  # Total number of batches
    print(f"Total Batches: {total_batches}")
    
    for i in range(total_batches):
        # Print progress
        print(f"Processing batch {i + 1}/{total_batches}...")
        
        # Get the current batch
        batch = texts[i * batch_size:(i + 1) * batch_size]
        
        # Generate embeddings for the batch
        batch_embeddings = model.encode(batch, batch_size=batch_size, show_progress_bar=False)
        embeddings.extend(batch_embeddings)
    
    return np.array(embeddings)


In [5]:
# Apply batch processing
texts = data['Transcript'].tolist()

# Generate BERT embeddings
batch_embeddings = get_sentence_embeddings_batch(texts, model, batch_size=32)

# Convert embeddings to a list of Python lists
batch_embeddings = [emb.tolist() for emb in batch_embeddings]

# Add embeddings as a new column in the DataFrame
data['bert_embeddings_trans'] = batch_embeddings

Total Batches: 100
Processing batch 1/100...
Processing batch 2/100...
Processing batch 3/100...
Processing batch 4/100...
Processing batch 5/100...
Processing batch 6/100...
Processing batch 7/100...
Processing batch 8/100...
Processing batch 9/100...
Processing batch 10/100...
Processing batch 11/100...
Processing batch 12/100...
Processing batch 13/100...
Processing batch 14/100...
Processing batch 15/100...
Processing batch 16/100...
Processing batch 17/100...
Processing batch 18/100...
Processing batch 19/100...
Processing batch 20/100...
Processing batch 21/100...
Processing batch 22/100...
Processing batch 23/100...
Processing batch 24/100...
Processing batch 25/100...
Processing batch 26/100...
Processing batch 27/100...
Processing batch 28/100...
Processing batch 29/100...
Processing batch 30/100...
Processing batch 31/100...
Processing batch 32/100...
Processing batch 33/100...
Processing batch 34/100...
Processing batch 35/100...
Processing batch 36/100...
Processing batch 3

In [6]:
# Apply batch processing
texts = data['Resume'].tolist()

# Generate BERT embeddings
batch_embeddings = get_sentence_embeddings_batch(texts, model, batch_size=32)

# Convert embeddings to a list of Python lists
batch_embeddings = [emb.tolist() for emb in batch_embeddings]

# Add embeddings as a new column in the DataFrame
data['bert_embeddings_resume'] = batch_embeddings

Total Batches: 100
Processing batch 1/100...
Processing batch 2/100...
Processing batch 3/100...
Processing batch 4/100...
Processing batch 5/100...
Processing batch 6/100...
Processing batch 7/100...
Processing batch 8/100...
Processing batch 9/100...
Processing batch 10/100...
Processing batch 11/100...
Processing batch 12/100...
Processing batch 13/100...
Processing batch 14/100...
Processing batch 15/100...
Processing batch 16/100...
Processing batch 17/100...
Processing batch 18/100...
Processing batch 19/100...
Processing batch 20/100...
Processing batch 21/100...
Processing batch 22/100...
Processing batch 23/100...
Processing batch 24/100...
Processing batch 25/100...
Processing batch 26/100...
Processing batch 27/100...
Processing batch 28/100...
Processing batch 29/100...
Processing batch 30/100...
Processing batch 31/100...
Processing batch 32/100...
Processing batch 33/100...
Processing batch 34/100...
Processing batch 35/100...
Processing batch 36/100...
Processing batch 3

In [7]:
# Apply batch processing
texts = data['Job Description'].tolist()

# Generate BERT embeddings
batch_embeddings = get_sentence_embeddings_batch(texts, model, batch_size=32)

# Convert embeddings to a list of Python lists
batch_embeddings = [emb.tolist() for emb in batch_embeddings]

# Add embeddings as a new column in the DataFrame
data['bert_embeddings_jd'] = batch_embeddings

Total Batches: 100
Processing batch 1/100...
Processing batch 2/100...
Processing batch 3/100...
Processing batch 4/100...
Processing batch 5/100...
Processing batch 6/100...
Processing batch 7/100...
Processing batch 8/100...
Processing batch 9/100...
Processing batch 10/100...
Processing batch 11/100...
Processing batch 12/100...
Processing batch 13/100...
Processing batch 14/100...
Processing batch 15/100...
Processing batch 16/100...
Processing batch 17/100...
Processing batch 18/100...
Processing batch 19/100...
Processing batch 20/100...
Processing batch 21/100...
Processing batch 22/100...
Processing batch 23/100...
Processing batch 24/100...
Processing batch 25/100...
Processing batch 26/100...
Processing batch 27/100...
Processing batch 28/100...
Processing batch 29/100...
Processing batch 30/100...
Processing batch 31/100...
Processing batch 32/100...
Processing batch 33/100...
Processing batch 34/100...
Processing batch 35/100...
Processing batch 36/100...
Processing batch 3

In [8]:
# Apply batch processing
texts = data['Reason for decision'].tolist()

# Generate BERT embeddings
batch_embeddings = get_sentence_embeddings_batch(texts, model, batch_size=32)

# Convert embeddings to a list of Python lists
batch_embeddings = [emb.tolist() for emb in batch_embeddings]

# Add embeddings as a new column in the DataFrame
data['bert_embeddings_reason'] = batch_embeddings

Total Batches: 100
Processing batch 1/100...
Processing batch 2/100...
Processing batch 3/100...
Processing batch 4/100...
Processing batch 5/100...
Processing batch 6/100...
Processing batch 7/100...
Processing batch 8/100...
Processing batch 9/100...
Processing batch 10/100...
Processing batch 11/100...
Processing batch 12/100...
Processing batch 13/100...
Processing batch 14/100...
Processing batch 15/100...
Processing batch 16/100...
Processing batch 17/100...
Processing batch 18/100...
Processing batch 19/100...
Processing batch 20/100...
Processing batch 21/100...
Processing batch 22/100...
Processing batch 23/100...
Processing batch 24/100...
Processing batch 25/100...
Processing batch 26/100...
Processing batch 27/100...
Processing batch 28/100...
Processing batch 29/100...
Processing batch 30/100...
Processing batch 31/100...
Processing batch 32/100...
Processing batch 33/100...
Processing batch 34/100...
Processing batch 35/100...
Processing batch 36/100...
Processing batch 3

In [9]:
# Apply batch processing
texts = data['polarity'].tolist()

# Generate BERT embeddings
batch_embeddings = get_sentence_embeddings_batch(texts, model, batch_size=32)

# Convert embeddings to a list of Python lists
batch_embeddings = [emb.tolist() for emb in batch_embeddings]

# Add embeddings as a new column in the DataFrame
data['bert_embeddings_polarity'] = batch_embeddings

Total Batches: 100
Processing batch 1/100...
Processing batch 2/100...
Processing batch 3/100...
Processing batch 4/100...
Processing batch 5/100...
Processing batch 6/100...
Processing batch 7/100...
Processing batch 8/100...
Processing batch 9/100...
Processing batch 10/100...
Processing batch 11/100...
Processing batch 12/100...
Processing batch 13/100...
Processing batch 14/100...
Processing batch 15/100...
Processing batch 16/100...
Processing batch 17/100...
Processing batch 18/100...
Processing batch 19/100...
Processing batch 20/100...
Processing batch 21/100...
Processing batch 22/100...
Processing batch 23/100...
Processing batch 24/100...
Processing batch 25/100...
Processing batch 26/100...
Processing batch 27/100...
Processing batch 28/100...
Processing batch 29/100...
Processing batch 30/100...
Processing batch 31/100...
Processing batch 32/100...
Processing batch 33/100...
Processing batch 34/100...
Processing batch 35/100...
Processing batch 36/100...
Processing batch 3

In [10]:
trans_expanded = pd.DataFrame(data['bert_embeddings_trans'].tolist(), index=data.index)
trans_expanded.columns = [f'trans_emb_{i}' for i in range(trans_expanded.shape[1])]

In [11]:
resume_expanded = pd.DataFrame(data['bert_embeddings_resume'].tolist(), index=data.index)
resume_expanded.columns = [f'resume_emb_{i}' for i in range(resume_expanded.shape[1])]

In [12]:
jd_expanded = pd.DataFrame(data['bert_embeddings_jd'].tolist(), index=data.index)
jd_expanded.columns = [f'jd_emb_{i}' for i in range(resume_expanded.shape[1])]

In [13]:
polarity_expanded = pd.DataFrame(data['bert_embeddings_polarity'].tolist(), index=data.index)
polarity_expanded.columns = [f'polarity_emb_{i}' for i in range(resume_expanded.shape[1])]

In [14]:
data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,ID,Name,Role,Transcript,Resume,decision,Reason for decision,Job Description,...,job_desc_complexity,interaction_quality,clarity_score,text_complexity_transcript,text_complexity_resume,bert_embeddings_trans,bert_embeddings_resume,bert_embeddings_jd,bert_embeddings_reason,bert_embeddings_polarity
0,0,0,uppaup1,alice smith,Software Engineer,here ' s a simulated interview for a software ...,here ' s a sample resume for alice smith : * *...,0,unsatisfactory references or background check .,here is a comprehensive job description for a ...,...,-23.75,585.0038,76.52,3.431535,2.246154,"[-0.07817234843969345, 0.049079809337854385, 0...","[-0.06964962184429169, -0.020188292488455772, ...","[-0.06100887805223465, 0.00048259348841384053,...","[-0.040162164717912674, -0.017107825726270676,...","[-0.0616629421710968, 0.04127373546361923, -0...."
1,1,1,uppaup2,hank brown,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a resume for hank brown , a selected ...",1,growth mindset and adaptability .,here is a job description for a software engin...,...,30.20,852.6588,53.00,3.270270,2.445736,"[-0.10405901074409485, 0.0319494865834713, 0.0...","[-0.1327906996011734, -0.01849408447742462, 0....","[-0.05222397297620773, -0.006503011099994183, ...","[0.06883401423692703, 0.011093874461948872, -0...","[-0.0616629421710968, 0.04127373546361923, -0...."
2,2,2,uppaup3,bob jones,Data Scientist,here ' s a simulated interview for a data scie...,"here ' s a sample resume for bob jones , who a...",0,inadequate communication or interpersonal skil...,here is a comprehensive job description for a ...,...,15.24,532.2378,75.50,2.843373,2.580392,"[-0.05218375474214554, -0.001612947671674192, ...","[-0.05162812024354935, -0.01782943308353424, -...","[-0.019691936671733856, -0.03378289192914963, ...","[0.10207246243953705, -0.001394086517393589, 0...","[-0.0616629421710968, 0.04127373546361923, -0...."
3,3,3,uppaup4,bob miller,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a sample resume for bob miller , a so...",1,strong cultural fit .,here is a sample job description for a softwar...,...,-49.73,713.8560,65.12,3.334532,2.626087,"[-0.07509399950504303, 0.018732476979494095, 0...","[-0.06595809012651443, -0.04517507180571556, -...","[-0.0209719929844141, 0.007755712140351534, 0....","[0.037205472588539124, 0.08258531987667084, -0...","[-0.0616629421710968, 0.04127373546361923, -0...."
4,4,4,uppaup5,ivy jones,Data Engineer,here ' s a simulated interview for a data engi...,here ' s a sample resume for ivy jones : * * i...,0,lack of relevant skills or experience .,here is a sample job description for a data en...,...,-34.51,618.0160,64.61,3.456067,3.044248,"[-0.06408646702766418, -0.014449547976255417, ...","[-0.03930989280343056, -0.09125347435474396, 0...","[-0.01902560144662857, -0.000738167145755142, ...","[0.06362446397542953, -0.021584298461675644, 0...","[-0.0616629421710968, 0.04127373546361923, -0...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3169,3169,3169,ananba44,ananya bansal,Data Engineer,here ' s a realistic interview transcript for ...,ananya bansal contact information : * phone : ...,1,experience gap,we are seeking a qualified data engineer with ...,...,28.33,784.8996,65.73,3.262500,2.821429,"[-0.07181523740291595, 0.0107415821403265, 0.0...","[-0.03627518191933632, 0.014616789296269417, 0...","[-0.014506162144243717, -0.03948299586772919, ...","[-0.0044815125875175, 0.00637664832174778, -0....","[-0.061662930995225906, 0.04127373918890953, -..."
3170,3170,3170,diyasi576,diya singh,Product Manager,interview transcript for product manager role ...,diya singh contact information : * email : [ d...,0,business acumen,we are seeking a qualified product manager wit...,...,26.81,734.4120,53.21,2.845659,2.798817,"[-0.053816378116607666, 0.03267369791865349, 0...","[-0.011702685616910458, -0.009703042916953564,...","[0.019577285274863243, -0.1129370629787445, 0....","[-0.041130270808935165, 0.03958068788051605, -...","[-0.061662930995225906, 0.04127373918890953, -..."
3171,3171,3171,harska507,harshitha kapoor,UI Designer,"interviewer : hi harshitha , thanks for coming...",harshitha kapoor ui engineer contact informati...,1,cultural fit,we are seeking a qualified ui engineer with de...,...,26.30,912.6348,64.61,3.032432,2.704301,"[-0.014528804458677769, 0.03496222943067551, 0...","[-0.11005644500255585, -0.027327178046107292, ...","[-0.03972817584872246, -0.08336398750543594, 0...","[0.05076644569635391, 0.09426332265138626, -0....","[-0.061662930995225906, 0.04127373918890953, -..."
3172,3172,3172,kabich225,kabir chopra,Software Engineer,here ' s a realistic interview transcript for ...,kabir chopra contact information : * email : [...,0,technical knowledge,we are seeking a qualified software engineer w...,...,27.83,804.9522,55.64,3.000000,2.353293,"[-0.13709937036037445, 0.059465426951646805, -...","[-0.08144161105155945, -0.008070056326687336, ...","[-0.015942247584462166, -0.039233069866895676,...","[-0.002540207700803876, 0.007100531365722418, ...","[-0.061662930995225906, 0.04127373918890953, -..."


Similarity

In [15]:
resume_embeddings = data['bert_embeddings_resume']  # Load precomputed resume embeddings
job_description_embeddings = data['bert_embeddings_jd']  # Load precomputed job description embeddings

In [16]:
# Define bins and labels
bins = [0, 0.2, 0.4, 0.6, 0.8, 1]
bin_labels = ["0-0.2", "0.2-0.4", "0.4-0.6", "0.6-0.8", "0.8-1.0"]


# Calculate similarity for each pair of resume and job description embeddings
similarities = [cosine_similarity([resume], [job_description])[0][0]
                for resume, job_description in zip(resume_embeddings, job_description_embeddings)]

# Assign the similarities to the new column in the data DataFrame
data['similarity'] = similarities

 # Categorize similarities into bins
similarity_bins = pd.cut(similarities, bins=bins, labels=bin_labels, include_lowest=True)

data['similarity_bin'] = similarity_bins

data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,ID,Name,Role,Transcript,Resume,decision,Reason for decision,Job Description,...,clarity_score,text_complexity_transcript,text_complexity_resume,bert_embeddings_trans,bert_embeddings_resume,bert_embeddings_jd,bert_embeddings_reason,bert_embeddings_polarity,similarity,similarity_bin
0,0,0,uppaup1,alice smith,Software Engineer,here ' s a simulated interview for a software ...,here ' s a sample resume for alice smith : * *...,0,unsatisfactory references or background check .,here is a comprehensive job description for a ...,...,76.52,3.431535,2.246154,"[-0.07817234843969345, 0.049079809337854385, 0...","[-0.06964962184429169, -0.020188292488455772, ...","[-0.06100887805223465, 0.00048259348841384053,...","[-0.040162164717912674, -0.017107825726270676,...","[-0.0616629421710968, 0.04127373546361923, -0....",0.595726,0.4-0.6
1,1,1,uppaup2,hank brown,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a resume for hank brown , a selected ...",1,growth mindset and adaptability .,here is a job description for a software engin...,...,53.00,3.270270,2.445736,"[-0.10405901074409485, 0.0319494865834713, 0.0...","[-0.1327906996011734, -0.01849408447742462, 0....","[-0.05222397297620773, -0.006503011099994183, ...","[0.06883401423692703, 0.011093874461948872, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.690269,0.6-0.8
2,2,2,uppaup3,bob jones,Data Scientist,here ' s a simulated interview for a data scie...,"here ' s a sample resume for bob jones , who a...",0,inadequate communication or interpersonal skil...,here is a comprehensive job description for a ...,...,75.50,2.843373,2.580392,"[-0.05218375474214554, -0.001612947671674192, ...","[-0.05162812024354935, -0.01782943308353424, -...","[-0.019691936671733856, -0.03378289192914963, ...","[0.10207246243953705, -0.001394086517393589, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.706612,0.6-0.8
3,3,3,uppaup4,bob miller,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a sample resume for bob miller , a so...",1,strong cultural fit .,here is a sample job description for a softwar...,...,65.12,3.334532,2.626087,"[-0.07509399950504303, 0.018732476979494095, 0...","[-0.06595809012651443, -0.04517507180571556, -...","[-0.0209719929844141, 0.007755712140351534, 0....","[0.037205472588539124, 0.08258531987667084, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.578580,0.4-0.6
4,4,4,uppaup5,ivy jones,Data Engineer,here ' s a simulated interview for a data engi...,here ' s a sample resume for ivy jones : * * i...,0,lack of relevant skills or experience .,here is a sample job description for a data en...,...,64.61,3.456067,3.044248,"[-0.06408646702766418, -0.014449547976255417, ...","[-0.03930989280343056, -0.09125347435474396, 0...","[-0.01902560144662857, -0.000738167145755142, ...","[0.06362446397542953, -0.021584298461675644, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.603155,0.6-0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3169,3169,3169,ananba44,ananya bansal,Data Engineer,here ' s a realistic interview transcript for ...,ananya bansal contact information : * phone : ...,1,experience gap,we are seeking a qualified data engineer with ...,...,65.73,3.262500,2.821429,"[-0.07181523740291595, 0.0107415821403265, 0.0...","[-0.03627518191933632, 0.014616789296269417, 0...","[-0.014506162144243717, -0.03948299586772919, ...","[-0.0044815125875175, 0.00637664832174778, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.638696,0.6-0.8
3170,3170,3170,diyasi576,diya singh,Product Manager,interview transcript for product manager role ...,diya singh contact information : * email : [ d...,0,business acumen,we are seeking a qualified product manager wit...,...,53.21,2.845659,2.798817,"[-0.053816378116607666, 0.03267369791865349, 0...","[-0.011702685616910458, -0.009703042916953564,...","[0.019577285274863243, -0.1129370629787445, 0....","[-0.041130270808935165, 0.03958068788051605, -...","[-0.061662930995225906, 0.04127373918890953, -...",0.552780,0.4-0.6
3171,3171,3171,harska507,harshitha kapoor,UI Designer,"interviewer : hi harshitha , thanks for coming...",harshitha kapoor ui engineer contact informati...,1,cultural fit,we are seeking a qualified ui engineer with de...,...,64.61,3.032432,2.704301,"[-0.014528804458677769, 0.03496222943067551, 0...","[-0.11005644500255585, -0.027327178046107292, ...","[-0.03972817584872246, -0.08336398750543594, 0...","[0.05076644569635391, 0.09426332265138626, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.605839,0.6-0.8
3172,3172,3172,kabich225,kabir chopra,Software Engineer,here ' s a realistic interview transcript for ...,kabir chopra contact information : * email : [...,0,technical knowledge,we are seeking a qualified software engineer w...,...,55.64,3.000000,2.353293,"[-0.13709937036037445, 0.059465426951646805, -...","[-0.08144161105155945, -0.008070056326687336, ...","[-0.015942247584462166, -0.039233069866895676,...","[-0.002540207700803876, 0.007100531365722418, ...","[-0.061662930995225906, 0.04127373918890953, -...",0.552459,0.4-0.6


Acceptance Rate

In [17]:
# Calculate acceptance rate
data['acceptance_rate'] = data.groupby(['similarity_bin', 'Role'], observed=True)['decision'].transform(
    lambda x: x.mean()
)

Resume Screening Score

In [18]:
data['resume_screening_score'] = (
    0.5 * data['resume_jd_similarity'] + 
    0.3 * data['cultural_fit_sentiment'] +
    0.2 * data['clarity_score']
)

Interview Performance Score

In [19]:
data['interview_performance_score'] = (
    0.3 * data['sentiment'] + 
    0.3 * data['cultural_fit_sentiment'] +
    0.2 * data['soft_skills_sentiment'] +
    0.2 * data['clarity_score']
)

In [20]:
threshold = 0.7
filtered_data = data[data['resume_screening_score'] >= threshold]

In [21]:
filtered_data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,ID,Name,Role,Transcript,Resume,decision,Reason for decision,Job Description,...,bert_embeddings_trans,bert_embeddings_resume,bert_embeddings_jd,bert_embeddings_reason,bert_embeddings_polarity,similarity,similarity_bin,acceptance_rate,resume_screening_score,interview_performance_score
0,0,0,uppaup1,alice smith,Software Engineer,here ' s a simulated interview for a software ...,here ' s a sample resume for alice smith : * *...,0,unsatisfactory references or background check .,here is a comprehensive job description for a ...,...,"[-0.07817234843969345, 0.049079809337854385, 0...","[-0.06964962184429169, -0.020188292488455772, ...","[-0.06100887805223465, 0.00048259348841384053,...","[-0.040162164717912674, -0.017107825726270676,...","[-0.0616629421710968, 0.04127373546361923, -0....",0.595726,0.4-0.6,0.463068,15.580658,15.650691
1,1,1,uppaup2,hank brown,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a resume for hank brown , a selected ...",1,growth mindset and adaptability .,here is a job description for a software engin...,...,"[-0.10405901074409485, 0.0319494865834713, 0.0...","[-0.1327906996011734, -0.01849408447742462, 0....","[-0.05222397297620773, -0.006503011099994183, ...","[0.06883401423692703, 0.011093874461948872, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.690269,0.6-0.8,0.596273,10.772642,10.959208
2,2,2,uppaup3,bob jones,Data Scientist,here ' s a simulated interview for a data scie...,"here ' s a sample resume for bob jones , who a...",0,inadequate communication or interpersonal skil...,here is a comprehensive job description for a ...,...,"[-0.05218375474214554, -0.001612947671674192, ...","[-0.05162812024354935, -0.01782943308353424, -...","[-0.019691936671733856, -0.03378289192914963, ...","[0.10207246243953705, -0.001394086517393589, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.706612,0.6-0.8,0.520619,15.433420,15.420855
3,3,3,uppaup4,bob miller,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a sample resume for bob miller , a so...",1,strong cultural fit .,here is a sample job description for a softwar...,...,"[-0.07509399950504303, 0.018732476979494095, 0...","[-0.06595809012651443, -0.04517507180571556, -...","[-0.0209719929844141, 0.007755712140351534, 0....","[0.037205472588539124, 0.08258531987667084, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.578580,0.4-0.6,0.463068,13.383894,13.468694
4,4,4,uppaup5,ivy jones,Data Engineer,here ' s a simulated interview for a data engi...,here ' s a sample resume for ivy jones : * * i...,0,lack of relevant skills or experience .,here is a sample job description for a data en...,...,"[-0.06408646702766418, -0.014449547976255417, ...","[-0.03930989280343056, -0.09125347435474396, 0...","[-0.01902560144662857, -0.000738167145755142, ...","[0.06362446397542953, -0.021584298461675644, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.603155,0.6-0.8,0.507407,13.340768,13.377836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3169,3169,3169,ananba44,ananya bansal,Data Engineer,here ' s a realistic interview transcript for ...,ananya bansal contact information : * phone : ...,1,experience gap,we are seeking a qualified data engineer with ...,...,"[-0.07181523740291595, 0.0107415821403265, 0.0...","[-0.03627518191933632, 0.014616789296269417, 0...","[-0.014506162144243717, -0.03948299586772919, ...","[-0.0044815125875175, 0.00637664832174778, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.638696,0.6-0.8,0.507407,13.314680,13.468707
3170,3170,3170,diyasi576,diya singh,Product Manager,interview transcript for product manager role ...,diya singh contact information : * email : [ d...,0,business acumen,we are seeking a qualified product manager wit...,...,"[-0.053816378116607666, 0.03267369791865349, 0...","[-0.011702685616910458, -0.009703042916953564,...","[0.019577285274863243, -0.1129370629787445, 0....","[-0.041130270808935165, 0.03958068788051605, -...","[-0.061662930995225906, 0.04127373918890953, -...",0.552780,0.4-0.6,0.461864,10.804167,10.973265
3171,3171,3171,harska507,harshitha kapoor,UI Designer,"interviewer : hi harshitha , thanks for coming...",harshitha kapoor ui engineer contact informati...,1,cultural fit,we are seeking a qualified ui engineer with de...,...,"[-0.014528804458677769, 0.03496222943067551, 0...","[-0.11005644500255585, -0.027327178046107292, ...","[-0.03972817584872246, -0.08336398750543594, 0...","[0.05076644569635391, 0.09426332265138626, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.605839,0.6-0.8,0.512903,13.172881,13.348467
3172,3172,3172,kabich225,kabir chopra,Software Engineer,here ' s a realistic interview transcript for ...,kabir chopra contact information : * email : [...,0,technical knowledge,we are seeking a qualified software engineer w...,...,"[-0.13709937036037445, 0.059465426951646805, -...","[-0.08144161105155945, -0.008070056326687336, ...","[-0.015942247584462166, -0.039233069866895676,...","[-0.002540207700803876, 0.007100531365722418, ...","[-0.061662930995225906, 0.04127373918890953, -...",0.552459,0.4-0.6,0.463068,11.255196,11.462417


In [22]:
data['final_score'] = 0.6 * data['resume_screening_score'] + 0.4 * data['interview_performance_score']

In [23]:
data

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,ID,Name,Role,Transcript,Resume,decision,Reason for decision,Job Description,...,bert_embeddings_resume,bert_embeddings_jd,bert_embeddings_reason,bert_embeddings_polarity,similarity,similarity_bin,acceptance_rate,resume_screening_score,interview_performance_score,final_score
0,0,0,uppaup1,alice smith,Software Engineer,here ' s a simulated interview for a software ...,here ' s a sample resume for alice smith : * *...,0,unsatisfactory references or background check .,here is a comprehensive job description for a ...,...,"[-0.06964962184429169, -0.020188292488455772, ...","[-0.06100887805223465, 0.00048259348841384053,...","[-0.040162164717912674, -0.017107825726270676,...","[-0.0616629421710968, 0.04127373546361923, -0....",0.595726,0.4-0.6,0.463068,15.580658,15.650691,15.608671
1,1,1,uppaup2,hank brown,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a resume for hank brown , a selected ...",1,growth mindset and adaptability .,here is a job description for a software engin...,...,"[-0.1327906996011734, -0.01849408447742462, 0....","[-0.05222397297620773, -0.006503011099994183, ...","[0.06883401423692703, 0.011093874461948872, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.690269,0.6-0.8,0.596273,10.772642,10.959208,10.847268
2,2,2,uppaup3,bob jones,Data Scientist,here ' s a simulated interview for a data scie...,"here ' s a sample resume for bob jones , who a...",0,inadequate communication or interpersonal skil...,here is a comprehensive job description for a ...,...,"[-0.05162812024354935, -0.01782943308353424, -...","[-0.019691936671733856, -0.03378289192914963, ...","[0.10207246243953705, -0.001394086517393589, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.706612,0.6-0.8,0.520619,15.433420,15.420855,15.428394
3,3,3,uppaup4,bob miller,Software Engineer,here ' s a simulated interview for a software ...,"here ' s a sample resume for bob miller , a so...",1,strong cultural fit .,here is a sample job description for a softwar...,...,"[-0.06595809012651443, -0.04517507180571556, -...","[-0.0209719929844141, 0.007755712140351534, 0....","[0.037205472588539124, 0.08258531987667084, -0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.578580,0.4-0.6,0.463068,13.383894,13.468694,13.417814
4,4,4,uppaup5,ivy jones,Data Engineer,here ' s a simulated interview for a data engi...,here ' s a sample resume for ivy jones : * * i...,0,lack of relevant skills or experience .,here is a sample job description for a data en...,...,"[-0.03930989280343056, -0.09125347435474396, 0...","[-0.01902560144662857, -0.000738167145755142, ...","[0.06362446397542953, -0.021584298461675644, 0...","[-0.0616629421710968, 0.04127373546361923, -0....",0.603155,0.6-0.8,0.507407,13.340768,13.377836,13.355595
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3169,3169,3169,ananba44,ananya bansal,Data Engineer,here ' s a realistic interview transcript for ...,ananya bansal contact information : * phone : ...,1,experience gap,we are seeking a qualified data engineer with ...,...,"[-0.03627518191933632, 0.014616789296269417, 0...","[-0.014506162144243717, -0.03948299586772919, ...","[-0.0044815125875175, 0.00637664832174778, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.638696,0.6-0.8,0.507407,13.314680,13.468707,13.376291
3170,3170,3170,diyasi576,diya singh,Product Manager,interview transcript for product manager role ...,diya singh contact information : * email : [ d...,0,business acumen,we are seeking a qualified product manager wit...,...,"[-0.011702685616910458, -0.009703042916953564,...","[0.019577285274863243, -0.1129370629787445, 0....","[-0.041130270808935165, 0.03958068788051605, -...","[-0.061662930995225906, 0.04127373918890953, -...",0.552780,0.4-0.6,0.461864,10.804167,10.973265,10.871806
3171,3171,3171,harska507,harshitha kapoor,UI Designer,"interviewer : hi harshitha , thanks for coming...",harshitha kapoor ui engineer contact informati...,1,cultural fit,we are seeking a qualified ui engineer with de...,...,"[-0.11005644500255585, -0.027327178046107292, ...","[-0.03972817584872246, -0.08336398750543594, 0...","[0.05076644569635391, 0.09426332265138626, -0....","[-0.061662930995225906, 0.04127373918890953, -...",0.605839,0.6-0.8,0.512903,13.172881,13.348467,13.243116
3172,3172,3172,kabich225,kabir chopra,Software Engineer,here ' s a realistic interview transcript for ...,kabir chopra contact information : * email : [...,0,technical knowledge,we are seeking a qualified software engineer w...,...,"[-0.08144161105155945, -0.008070056326687336, ...","[-0.015942247584462166, -0.039233069866895676,...","[-0.002540207700803876, 0.007100531365722418, ...","[-0.061662930995225906, 0.04127373918890953, -...",0.552459,0.4-0.6,0.463068,11.255196,11.462417,11.338085


In [24]:
data.to_csv('similarity.csv')