In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Torch 
import torch
import torch.nn as nn
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader

# Scikit-Learn
# Data Encoding and Scaling
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [3]:
df = pd.read_csv('IntervieweeDataset.csv')

In [4]:
df.head(2)

Unnamed: 0,Name,Age,Gender,Type of Graduation/Post Graduation,Marital status,Mode of interview given by candidate?,Pre Interview Check,Fluency in English based on introduction,Confidence based on Introduction (English),Confidence based on the topic given,...,Structured Thinking Based on the PPT Question.1,Structured Thinking( Call pitch).1,Regional fluency based on the topic given .1,Regional fluency Based on the PPT Question.1,Regional fluency based on the sales scenario.1,Confidence Score,Structured Thinking Score,Regional Fluency Score,Total Score,Whether joined the company or not\n
0,parida,25,Female,Masters in data science,Unmarried,Mobile,Proceed with the Interview,Able to speak sentences in a clear/coherent wa...,Impactful - Good confidence throughout the Int...,Guarded Confidence - Confident in some areas a...,...,3.0,2.0,1.0,1.0,1.0,11.0,7.0,3.0,42.0,No
1,shreej,29,Female,BSc or MSc,Unmarried,Mobile,Proceed with the Interview,Able to speak sentences in a clear/coherent wa...,Impactful - Good confidence throughout the Int...,Impactful - Good confidence throughout the Int...,...,3.0,3.0,3.0,3.0,3.0,12.0,9.0,9.0,60.0,No


In [5]:
df.dropna(inplace=True)

In [6]:
df.drop(df.iloc[:,8:18], axis=1, inplace=True)

In [7]:
df.drop(['Does the candidate has mother tongue influence while speaking english.', 'How many slides candidate have submitted in PPT?'], axis=1, inplace=True)

In [8]:
df.shape

(538, 40)

In [9]:
df['Type of Graduation/Post Graduation'].unique()

array(['Masters in data science', 'BSc or MSc', 'B.E / B-Tech', 'BA/MA',
       'B.ed(Teaching)', 'M.E / M-Tech', 'B.com (Bachelor of commerce)',
       'MED', 'M.com', 'MBA', 'BCA/MCA',
       'B.comm (Bachelor of mass comm.)', 'BBA or BBM',
       'Hotel Management', 'PG diploma ', 'B.pharma/M.pharma', 'PGDM',
       'Bachelor in dental', 'pgdm', 'Bachelor in medical lab', 'Pgdm',
       'master maas comm', 'BE - CSe and CDAC '], dtype=object)

In [10]:
df.rename(columns={'Type of Graduation/Post Graduation':'Education', 'Mode of interview given by candidate?':'Mode of Interview', 'Has acquaintance in Company and has spoken to him/her before applying?':'Acquaintance and Referral'}, inplace=True)

In [11]:
df.rename(columns={'Whether joined the company or not\n': 'Whether joined the company or not', 'What was the type of Role?\t': 'What was the type of Role?'}, inplace=True)

In [12]:
df.rename(columns={"But, my child's exam are going on now, so we will keep the counselling session after the exams get over.(Time: Favourable pitch: Counsellor hype)":"But, my child's exam are going on now, so we will keep the counselling session after the exams get over"}, inplace=True)

In [13]:
df.head(1)

Unnamed: 0,Name,Age,Gender,Education,Marital status,Mode of Interview,Pre Interview Check,Fluency in English based on introduction,Acquaintance and Referral,Candidate Status,...,Structured Thinking Based on the PPT Question.1,Structured Thinking( Call pitch).1,Regional fluency based on the topic given .1,Regional fluency Based on the PPT Question.1,Regional fluency based on the sales scenario.1,Confidence Score,Structured Thinking Score,Regional Fluency Score,Total Score,Whether joined the company or not
0,parida,25,Female,Masters in data science,Unmarried,Mobile,Proceed with the Interview,Able to speak sentences in a clear/coherent wa...,No,Experienced in non client facing(equal to or m...,...,3.0,2.0,1.0,1.0,1.0,11.0,7.0,3.0,42.0,No


In [14]:
df.columns

Index(['Name', 'Age', 'Gender', 'Education', 'Marital status',
       'Mode of Interview', 'Pre Interview Check',
       'Fluency in English based on introduction', 'Acquaintance and Referral',
       'Candidate Status', 'Last Fixed CTC (lakhs) ', 'Currently Employed',
       'Experienced candidate - (Experience in months)',
       'Experienced Candidate (Nature of work)', 'What was the type of Role?',
       'Call-pitch Elements used during the call Sales Scenario',
       'But, my child's exam are going on now, so we will keep the counselling session after the exams get over',
       'Let me discuss it with my child',
       'Sir being in education industry I know this is a marketing gimmick and at the end of the day you'll be selling the app.',
       'Role acceptance', 'Interview Verdict',
       'Candidate is willing to relocate',
       'Role Location to be given to the candidate', 'Comments',
       'RedFlags Comments in Interview',
       'Confidence based on Introduction (Engl

In [15]:
df['Whether joined the company or not'] = df['Whether joined the company or not'].replace({'Joined':'Yes', 'Not Joined':'No'})
df['Whether joined the company or not'] = df['Whether joined the company or not'].map({'Yes': 1, 'No': 0}).astype(int)

In [16]:
numerical_df = df.copy()
numerical_df.drop(['Name',
                   'Comments',
                   'RedFlags Comments in Interview',
                   'Whether joined the company or not',
                  'Call-pitch Elements used during the call Sales Scenario',
                  "But, my child's exam are going on now, so we will keep the counselling session after the exams get over",
                  'Let me discuss it with my child',
                  "Sir being in education industry I know this is a marketing gimmick and at the end of the day you'll be selling the app.",], axis=1, inplace=True)

In [17]:
df = df[['Name',
         'Comments',
         'RedFlags Comments in Interview',
         'Whether joined the company or not',
        'Call-pitch Elements used during the call Sales Scenario',
        "But, my child's exam are going on now, so we will keep the counselling session after the exams get over",
        'Let me discuss it with my child',
        "Sir being in education industry I know this is a marketing gimmick and at the end of the day you'll be selling the app.",
        ]]

In [18]:
le = LabelEncoder()
le

In [19]:
# Define a mapping dictionary
degree_mapping = { 'B.E / B-Tech': 'Engineering', 'M.E / M-Tech': 'Engineering', 'BSc or MSc': 'Science', 'BCA/MCA': 'Science', 'BCS': 'Science', 'B.com (Bachelor of commerce)': 'Commerce & Management', 'M.com': 'Commerce & Management', 'BBA or BBM': 'Commerce & Management', 'MBA': 'Commerce & Management', 'PGDM': 'Commerce & Management', 'PGDCA': 'Commerce & Management', 'BMS': 'Commerce & Management', 'BA/MA': 'Arts & Humanities', 'MSW': 'Arts & Humanities', 'B.pharma/M.pharma': 'Pharmacy & Medical', 'B.ed(Teaching)': 'Teaching & Education', 'Hotel Management': 'Hotel Management', 'B.comm (Bachelor of mass comm.)': 'Mass Communication', 'BVOC': 'Vocational Studies' } 

In [20]:
#mapping & "Other" to unknown categories 
numerical_df['Education'] = numerical_df['Education'].map(degree_mapping).fillna('Other') 

In [21]:
numerical_df['Education'].value_counts()

Education
Engineering              166
Commerce & Management    148
Science                  123
Arts & Humanities         72
Teaching & Education      10
Other                      9
Hotel Management           4
Pharmacy & Medical         4
Mass Communication         2
Name: count, dtype: int64

In [22]:
job_category_mapping = {
    'tech': 'Technology', 'tech job ': 'Technology', 'in  tech job ': 'Technology',
    'software ': 'Technology', 'software testor': 'Technology', 'IT RELATED WORK': 'Technology',
    'application support ': 'Technology', 'tech support ': 'Technology',
    'Junior software engineer - Providing a task.': 'Technology', 'business analyst': 'Technology',
    'BACKEND ': 'Technology',

    'Sales numbers': 'Sales & Marketing', 'Call time, Sales numbers': 'Sales & Marketing',
    'Sales numbers, Customer visits': 'Sales & Marketing',
    'Call time, Sales numbers, Customer visits': 'Sales & Marketing',
    'Call time, Sales numbers, Customer visits, working hours': 'Sales & Marketing',
    'marketing ': 'Sales & Marketing', 'Lead generation ': 'Sales & Marketing',
    'sales ': 'Sales & Marketing', 'CSAT Traget': 'Sales & Marketing',
    'Marketing and HR': 'Sales & Marketing', 'Call time': 'Sales & Marketing',
    'Call time, Customer visits': 'Sales & Marketing', 
    'Call time, Customer visits, working hours': 'Sales & Marketing',

    'working hours': 'Operations & Management', 'Operations': 'Operations & Management',
    'opertaional': 'Operations & Management', 'operation': 'Operations & Management',
    'data and operations ': 'Operations & Management',
    'maintaining data and client meeting ': 'Operations & Management',
    'targets regarding joinees ': 'Operations & Management', 
    'manpower managmeny': 'Operations & Management',
    'data entry': 'Operations & Management', 'Customer visits, working hours': 'Operations & Management',

    'finance': 'Finance & Accounting', 'finance ': 'Finance & Accounting',
    'finance of tv and mobiles, ': 'Finance & Accounting', 'Accounts': 'Finance & Accounting',
    'complete and file itr under timeline': 'Finance & Accounting',
    'Details of stocks': 'Finance & Accounting',

    'Teaching': 'Teaching & Education', 'teaching': 'Teaching & Education',
    'TEACHING': 'Teaching & Education', 'TEACHING ': 'Teaching & Education',
    'tEACHER': 'Teaching & Education', 'teaching roles': 'Teaching & Education',

    'quality check': 'Manufacturing & Engineering', 'quality checking': 'Manufacturing & Engineering',
    'trade fitter 2 years': 'Manufacturing & Engineering', 'mech field trainee and had a team': 'Manufacturing & Engineering',
    'civil engineering': 'Manufacturing & Engineering', 'civil engg cdt, some client handling ': 'Manufacturing & Engineering',
    'engg': 'Manufacturing & Engineering', 'geolist engineering ': 'Manufacturing & Engineering',

    'customer feeback': 'Customer Support & Service', 'customer service ': 'Customer Support & Service',
    'resolve queries': 'Customer Support & Service', "SOLVING CANDIDATE'S ISSUES": 'Customer Support & Service',

    'Market research': 'Research & Analysis', 'how he is gathering information': 'Research & Analysis',
    'analyzing work': 'Research & Analysis', 'validation work': 'Research & Analysis',

    'CLIENTS BROUGHT TO THE CO': 'Human Resources',

    'Fresher(<6 months)': 'Other', 'Fresher': 'Other', 'FRESHER': 'Other',
    'Fresher(<6 months), Operation': 'Other', 'MeetINGs': 'Other', 'Meetings': 'Other',
    'other': 'Other', 'others': 'Other', 'email': 'Other', 'na': 'Other', 'Na': 'Other', '.': 'Other',
    'No targets': 'Other', 'no targets ': 'Other', 'no target': 'Other', 'No targets ': 'Other',
    'No Targets': 'Other', 'Non Target Oriented Role': 'Other', 'no  ': 'Other'
}


In [23]:
#mapping & "Other" to unknown categories 
numerical_df['Experienced Candidate (Nature of work)'] = numerical_df['Experienced Candidate (Nature of work)'].map(job_category_mapping).fillna('Other') 

In [24]:
numerical_df['Experienced Candidate (Nature of work)'].value_counts()

Experienced Candidate (Nature of work)
Other                          382
Sales & Marketing               84
Operations & Management         28
Technology                      11
Teaching & Education            10
Manufacturing & Engineering      8
Finance & Accounting             6
Research & Analysis              4
Customer Support & Service       4
Human Resources                  1
Name: count, dtype: int64

In [118]:
call_pitch_mapping = {
    'Purpose of Call (Book a Counselling Session)': 'Purpose & Need Generation',

    'Introduction (Self Intro,Company Name)': 'Introduction',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session)': 'Purpose & Need Generation',

    'Introduction (Self Intro,Company Name), Need Generation - By Asking ques like student class, performance etc..': 'Purpose & Need Generation',

    'Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc.': 'Rapport Building',

    'Need Generation - By Asking ques like student class, performance etc..': 'Purpose & Need Generation',

    'Closing - Taking address / date / time from the customer to book a session': 'Closing',

    'Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.': 'Urgency Creation',

    'None of the Above': 'Unknown/Unclear',
    "None of the Above / CDT Don't Know the Call Pitch": 'Unknown/Unclear',
    "Introduction (Self Intro,Company Name), None of the Above / CDT Don't Know the Call Pitch": 'Unknown/Unclear',
    "Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., None of the Above / CDT Don't Know the Call Pitch": 'Unknown/Unclear',

    # Multi-component calls categorized under 'Multiple Components'
    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc..': 'Multiple Components',
    
    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc.': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc.., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc.., Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc.., Urgency of the session  - Scholarship, CDT shortlisted from your area and so on., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Need Generation - By Asking ques like student class, performance etc..': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Need Generation - By Asking ques like student class, performance etc.., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Need Generation - By Asking ques like student class, performance etc..': 'Multiple Components',

    'Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc.., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.': 'Multiple Components',

    'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Urgency of the session  - Scholarship, CDT shortlisted from your area and so on., Closing - Taking address / date / time from the customer to book a session': 'Multiple Components',

    "Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc.., None of the Above / CDT Don't Know the Call Pitch": 'Multiple Components',

    'Introduction (Self Intro,Company Name), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.': 'Multiple Components',
}

In [None]:
df['Call-pitch Elements used during the call Sales Scenario'] = df['Call-pitch Elements used during the call Sales Scenario'].map(call_pitch_mapping).fillna('Unknown/Unclear')

In [25]:
numerical_df['Age'] = numerical_df['Age'].astype(str).str.replace('+', '', regex=False).astype(int)

In [26]:
bins = [18, 22, 25, 28, 32, 35, float('inf')]
labels = ['18-22', '23-25', '26-28', '29-32', '33-35', '35+']

In [27]:
numerical_df['Age'] = pd.cut(numerical_df['Age'], bins=bins, labels=labels, right=True)

In [28]:
numerical_df['Age'].unique()

['23-25', '29-32', '26-28', '18-22']
Categories (6, object): ['18-22' < '23-25' < '26-28' < '29-32' < '33-35' < '35+']

In [29]:
numerical_df['Gender'] = le.fit_transform(numerical_df['Gender'])

In [30]:
numerical_df['Education'] = le.fit_transform(numerical_df['Education'])

In [31]:
numerical_df['Experienced Candidate (Nature of work)'] = le.fit_transform(numerical_df['Experienced Candidate (Nature of work)'])

In [32]:
numerical_df['Mode of Interview'] = le.fit_transform(numerical_df['Mode of Interview'])

In [33]:
numerical_df['Marital status'] = le.fit_transform(numerical_df['Marital status'])

In [34]:
numerical_df['Acquaintance and Referral'] = le.fit_transform(numerical_df['Acquaintance and Referral'])

In [35]:
numerical_df['Candidate Status'] = le.fit_transform(numerical_df['Candidate Status'])

In [36]:
numerical_df['Currently Employed'] = le.fit_transform(numerical_df['Currently Employed'])

In [37]:
numerical_df['What was the type of Role?'] = le.fit_transform(numerical_df['What was the type of Role?'])

In [38]:
numerical_df['Role Location to be given to the candidate'] = le.fit_transform(numerical_df['Role Location to be given to the candidate'])

In [39]:
numerical_df['Fluency in English based on introduction'] = le.fit_transform(numerical_df['Fluency in English based on introduction'])

In [40]:
numerical_df['Last Fixed CTC (lakhs) '].unique()

array(['5-5.99', 'Fresher', '2-2.99', '3-3.99', '4-4.99', '0-1.99', '7+',
       '6-6.99'], dtype=object)

In [41]:
transformer = ColumnTransformer(transformers = [
    ('t1', OrdinalEncoder(categories=[['No - Want Specific Centre Location Only','Yes - Anywhere Within a City','Yes - Anywhere Within a State','Yes - Anywhere in PAN India']]), ['Candidate is willing to relocate']),
    ('t2', OrdinalEncoder(categories=[['No','Yes : Think and says yes.(Shows some hesitation)','Emphatic Yes']]), ['Role acceptance']),
    ('t3', OrdinalEncoder(categories=[['Fresher(<6 months)','6-11.99 Months','12-17.99 Months','18-23.99 Months','24-29.99 Months','30-35.99 Months','36-47.99 Months','48+ Months']]), ['Experienced candidate - (Experience in months)']),
    ('t4', OrdinalEncoder(categories=[['Reject','Borderline Reject','Borderline Select','Select','Premium Select']]), ['Interview Verdict']),
    ('t5', OrdinalEncoder(categories=[['18-22','23-25','26-28','29-32']]), ['Age']),
    ('t6', OrdinalEncoder(categories=[['Fresher','0-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99','7+']]), ['Last Fixed CTC (lakhs) '])
], remainder='passthrough')

In [42]:
numerical_df = transformer.fit_transform(numerical_df)
features_names = transformer.get_feature_names_out()
numerical_df = pd.DataFrame(numerical_df, columns=features_names)

In [43]:
numerical_df.head(5)

Unnamed: 0,t1__Candidate is willing to relocate,t2__Role acceptance,t3__Experienced candidate - (Experience in months),t4__Interview Verdict,t5__Age,t6__Last Fixed CTC (lakhs),remainder__Gender,remainder__Education,remainder__Marital status,remainder__Mode of Interview,...,remainder__Structured Thinking (In regional only).1,remainder__Structured Thinking Based on the PPT Question.1,remainder__Structured Thinking( Call pitch).1,remainder__Regional fluency based on the topic given .1,remainder__Regional fluency Based on the PPT Question.1,remainder__Regional fluency based on the sales scenario.1,remainder__Confidence Score,remainder__Structured Thinking Score,remainder__Regional Fluency Score,remainder__Total Score
0,1.0,1.0,1.0,0.0,1.0,5.0,0.0,5.0,2.0,1.0,...,2.0,3.0,2.0,1.0,1.0,1.0,11.0,7.0,3.0,42.0
1,3.0,2.0,0.0,0.0,3.0,0.0,0.0,7.0,2.0,1.0,...,3.0,3.0,3.0,3.0,3.0,3.0,12.0,9.0,9.0,60.0
2,1.0,2.0,0.0,4.0,2.0,0.0,0.0,2.0,2.0,1.0,...,3.0,3.0,3.0,3.0,1.0,3.0,10.0,9.0,7.0,52.0
3,3.0,2.0,0.0,4.0,0.0,0.0,1.0,2.0,2.0,0.0,...,3.0,3.0,3.0,3.0,3.0,3.0,12.0,9.0,9.0,60.0
4,0.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,1.0,...,2.0,2.0,1.0,3.0,1.0,3.0,10.0,5.0,7.0,44.0


In [44]:
numerical_df.shape

(538, 31)

In [45]:
numerical_df.columns

Index(['t1__Candidate is willing to relocate', 't2__Role acceptance',
       't3__Experienced candidate - (Experience in months)',
       't4__Interview Verdict', 't5__Age', 't6__Last Fixed CTC (lakhs) ',
       'remainder__Gender', 'remainder__Education',
       'remainder__Marital status', 'remainder__Mode of Interview',
       'remainder__Fluency in English based on introduction',
       'remainder__Acquaintance and Referral', 'remainder__Candidate Status',
       'remainder__Currently Employed',
       'remainder__Experienced Candidate (Nature of work)',
       'remainder__What was the type of Role?',
       'remainder__Role Location to be given to the candidate',
       'remainder__Confidence based on Introduction (English).1',
       'remainder__Confidence based on the topic given  .1',
       'remainder__Confidence Based on the PPT Question.1',
       'remainder__Confidence based on the sales scenario.1',
       'remainder__Structured Thinking (In regional only).1',
       'rema

In [46]:
numerical_df.columns = numerical_df.columns.str.replace(r'^remainder__', '', regex=True)

In [47]:
numerical_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 538 entries, 0 to 537
Data columns (total 31 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   t1__Candidate is willing to relocate                538 non-null    float64
 1   t2__Role acceptance                                 538 non-null    float64
 2   t3__Experienced candidate - (Experience in months)  538 non-null    float64
 3   t4__Interview Verdict                               538 non-null    float64
 4   t5__Age                                             538 non-null    float64
 5   t6__Last Fixed CTC (lakhs)                          538 non-null    float64
 6   Gender                                              538 non-null    float64
 7   Education                                           538 non-null    float64
 8   Marital status                                      538 non-null    float64
 9  

## Feature Scaling

In [49]:
scaler = StandardScaler()
scaler

In [50]:
scaled_data = scaler.fit_transform(numerical_df)
numerical_df = pd.DataFrame(scaled_data, columns=numerical_df.columns)

In [51]:
numerical_df.head()

Unnamed: 0,t1__Candidate is willing to relocate,t2__Role acceptance,t3__Experienced candidate - (Experience in months),t4__Interview Verdict,t5__Age,t6__Last Fixed CTC (lakhs),Gender,Education,Marital status,Mode of Interview,...,Structured Thinking (In regional only).1,Structured Thinking Based on the PPT Question.1,Structured Thinking( Call pitch).1,Regional fluency based on the topic given .1,Regional fluency Based on the PPT Question.1,Regional fluency based on the sales scenario.1,Confidence Score,Structured Thinking Score,Regional Fluency Score,Total Score
0,-0.555944,-1.115859,-0.14555,-1.379464,-0.092023,2.816563,-1.543907,0.838863,0.330815,0.7329,...,-0.487552,1.025396,-0.391771,-1.083502,-0.989076,-1.014829,0.974052,0.042661,-1.288389,-0.122521
1,1.138662,0.579995,-0.608899,-1.379464,1.970825,-0.673328,-1.543907,1.603793,0.330815,0.7329,...,1.046383,1.025396,0.985829,1.032375,1.105896,1.143187,1.417027,1.317761,1.369067,1.592062
2,-0.555944,0.579995,-0.608899,1.477712,0.939401,-0.673328,-1.543907,-0.308531,0.330815,0.7329,...,1.046383,1.025396,0.985829,1.032375,-0.989076,1.143187,0.531076,1.317761,0.483249,0.830025
3,1.138662,0.579995,-0.608899,1.477712,-1.123447,-0.673328,0.647708,-0.308531,0.330815,-1.364442,...,1.046383,1.025396,0.985829,1.032375,1.105896,1.143187,1.417027,1.317761,1.369067,1.592062
4,-1.403247,-1.115859,-0.608899,0.049124,-1.123447,-0.673328,0.647708,-1.073461,0.330815,0.7329,...,-0.487552,-0.507001,-1.769371,1.032375,-0.989076,1.143187,0.531076,-1.232439,0.483249,0.067988


In [52]:
numerical_df.shape

(538, 31)

In [53]:
df.head(2)

Unnamed: 0,Name,Comments,RedFlags Comments in Interview,Whether joined the company or not,Call-pitch Elements used during the call Sales Scenario,"But, my child's exam are going on now, so we will keep the counselling session after the exams get over",Let me discuss it with my child,Sir being in education industry I know this is a marketing gimmick and at the end of the day you'll be selling the app.,Pre Interview Check
0,parida,"Lipsa is 25 female from Orissa, Family BG - Fa...",At least Graduated ( not 12th Pass or diploma ...,0,Purpose of Call (Book a Counselling Session),Urgency using Time,None of the above,None of the above,Proceed with the Interview
1,shreej,29 yo / female / unmarried / MSc Finance UK 20...,Not Rehire (CDT have not joined byjus in sales...,0,"Introduction (Self Intro,Company Name), Purpos...",Asking Questions,None of the above,Non chargeable session,Proceed with the Interview


In [54]:
df.columns

Index(['Name', 'Comments', 'RedFlags Comments in Interview',
       'Whether joined the company or not',
       'Call-pitch Elements used during the call Sales Scenario',
       'But, my child's exam are going on now, so we will keep the counselling session after the exams get over',
       'Let me discuss it with my child',
       'Sir being in education industry I know this is a marketing gimmick and at the end of the day you'll be selling the app.',
       'Pre Interview Check'],
      dtype='object')

In [110]:
df['Call-pitch Elements used during the call Sales Scenario'].unique()

array(['Purpose of Call (Book a Counselling Session)',
       'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Need Generation - By Asking ques like student class, performance etc..',
       'Introduction (Self Intro,Company Name)',
       'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session), Rapport Building - Connect with Customer by asking ques like customer background, location, language, education etc., Need Generation - By Asking ques like student class, performance etc..',
       'None of the Above',
       'Introduction (Self Intro,Company Name), Urgency of the session  - Scholarship, CDT shortlisted from your area and so on.',
       'Introduction (Self Intro,Company Name), Purpose of Call (Book a Counselling Session)',
       'Introduction (Self Intro,Company Name), Need Generation - By Asking ques like student class, performance etc..',
       'Purpose of Call (Book a Counselling Session), Rapport Buildin

In [114]:
df['Call-pitch Elements used during the call Sales Scenario'].value_counts()

Call-pitch Elements used during the call Sales Scenario
Unknown/Unclear              157
Purpose & Need Generation    149
Introduction                 126
Multiple Components           98
Rapport Building               4
Closing                        2
Urgency Creation               2
Name: count, dtype: int64