In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.initializers import Constant
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from keras.layers import Dense,SpatialDropout1D
import contractions
import re
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# initializing Stop words libraries
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
from nltk.tokenize import word_tokenize
from concurrent.futures import ThreadPoolExecutor
import time

[nltk_data] Downloading package punkt to /Users/rishi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/rishi/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /Users/rishi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [9]:
#results for job data - keywords and their scores
result_df2 = pd.read_excel("job_lvl_keywords.xlsx")
result_df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12930 entries, 0 to 12929
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   job_title         12930 non-null  object 
 1   cleaned_keywords  12930 non-null  object 
 2   total_score       12930 non-null  float64
 3   Total_jobs        12930 non-null  int64  
 4   avg_score         12930 non-null  float64
dtypes: float64(2), int64(1), object(2)
memory usage: 505.2+ KB


In [10]:
#data processing and key word extraction functions - takes the resume content as input string
def tokenize_and_tag(desc):
    tokens = nltk.word_tokenize(desc.lower())
    filtered_tokens = [w for w in tokens if not w in stop_words]
    tagged = nltk.pos_tag(filtered_tokens)
    return tagged

def extract_POS(tagged):
    #pattern 1
    grammar1 = ('''Noun Phrases: {<DT>?<JJ>*<NN|NNS|NNP>+}''')
    chunkParser = nltk.RegexpParser(grammar1)
    tree1 = chunkParser.parse(tagged)

    # typical noun phrase pattern appending to be concatted later
    g1_chunks = []
    for subtree in tree1.subtrees(filter=lambda t: t.label() == 'Noun Phrases'):
        g1_chunks.append(subtree)

    #pattern 2
    grammar2 = ('''NP2: {<IN>?<JJ|NN>*<NNS|NN>} ''')
    chunkParser = nltk.RegexpParser(grammar2)
    tree2 = chunkParser.parse(tagged)

    # variation of a noun phrase pattern to be pickled for later analyses
    g2_chunks = []
    for subtree in tree2.subtrees(filter=lambda t: t.label() == 'NP2'):
        g2_chunks.append(subtree)

    #pattern 3
    grammar3 = (''' VS: {<VBG|VBZ|VBP|VBD|VB|VBN><NNS|NN>*}''')
    chunkParser = nltk.RegexpParser(grammar3)
    tree3 = chunkParser.parse(tagged)

    # verb-noun pattern appending to be concatted later
    g3_chunks = []
    for subtree in tree3.subtrees(filter=lambda t: t.label() == 'VS'):
        g3_chunks.append(subtree)


    # pattern 4
    # any number of a singular or plural noun followed by a comma followed by the same noun, noun, noun pattern
    grammar4 = ('''Commas: {<NN|NNS>*<,><NN|NNS>*<,><NN|NNS>*} ''')
    chunkParser = nltk.RegexpParser(grammar4)
    tree4 = chunkParser.parse(tagged)

    # common pattern of listing skills appending to be concatted later
    g4_chunks = []
    for subtree in tree4.subtrees(filter=lambda t: t.label() == 'Commas'):
        g4_chunks.append(subtree)

    return g1_chunks, g2_chunks, g3_chunks, g4_chunks

def training_set(chunks):
    '''creates a dataframe that easily parsed with the chunks data '''
    df = pd.DataFrame(chunks)
    df.fillna('X', inplace = True)

    train = []
    for row in df.values:
        phrase = ''
        for tup in row:
            # needs a space at the end for seperation
            phrase += tup[0] + ' '
        phrase = ''.join(phrase)
        # could use padding tages but encoder method will provide during
        # tokenizing/embeddings; X can replace paddding for now
        train.append( phrase.replace('X', '').strip())

    df['phrase'] = train

    #returns 50% of each dataframe to be used if you want to improve execution time
    # return df.phrase.sample(frac = 0.5)
    # Update: only do 50% if running on excel
    return df.phrase

def strip_commas(df):
    '''create new series of individual n-grams'''
    grams = []
    for sen in df:
        sent = sen.split(',')
        for word in sent:
            grams.append(word)
    return pd.Series(grams)

def generate_phrases(desc):
    tagged = tokenize_and_tag(desc)
    g1_chunks, g2_chunks, g3_chunks, g4_chunks = extract_POS(tagged)
    c = training_set(g4_chunks)
    separated_chunks4 = strip_commas(c)
    phrases = pd.concat([training_set(g1_chunks),
                          training_set(g2_chunks),
                          training_set(g3_chunks),
                          separated_chunks4],
                            ignore_index = True )
    return phrases

"""Creates corpus from feature column, which is a pandas series"""
def create_corpus(df):
    corpus=[]
    for phrase in tqdm(df):
        words=[word.lower() for word in word_tokenize(phrase) if(word.isalpha()==1)]
        corpus.append(words)
    return corpus

"""Create padded sequences of equal lenght as input to LSTM"""
def create_padded_inputs(corpus):
    MAX_LEN=20
    tokenizer_obj=Tokenizer()
    tokenizer_obj.fit_on_texts(corpus)
    sequences=tokenizer_obj.texts_to_sequences(corpus)

    phrase_pad=pad_sequences(sequences,maxlen=MAX_LEN,truncating='post',padding='post')
    return phrase_pad

def clean(desc):
    desc = contractions.fix(desc)
    desc = re.sub("[!@.$\'\'':()]", "", desc)
    return desc

def get_predictions(desc):
    #clean
    desc = clean(desc)
    #load model
    model = tf.keras.models.load_model('/Users/rishi/Documents/CSE 6242/group project/main/lstm_skill_extractor.h5')
    #tokenize and convert to phrases
    phrases = generate_phrases(desc)
    #preprocess unseen data
    corpus=create_corpus(phrases)
    corpus_pad = create_padded_inputs(corpus)
    #get predicted classes
    predictions = (model.predict(corpus_pad) >0.4).astype('int32')
    #return predicted skills as list
    out = pd.DataFrame({'Phrase':phrases, 'Class':predictions.ravel(), 'Scores': model.predict(corpus_pad).ravel()})
    skills = out.loc[out['Class'] == 1].sort_values(by='Scores',ascending=False)
    return  skills[['Phrase','Scores']]

In [11]:
#this input would actually come from an API request based on user input
#this is a placeholder as we currently dont have the feature implemented
#generated from keyword_extraction_model-main.ipynb
resume_df = pd.read_csv("resume_dataset.csv", delimiter=",", encoding='utf-8')
resume = resume_df['Resume'].iloc[0]

In [12]:
#calls the above functions and generates a df with key phrases and their scores
# we need to further extract keywords from the phrases.
columns = ['Phrase', 'Scores']
resume_keywords = pd.DataFrame(columns=columns)


    #if(i>100):
        #break
    #print(type(r['Translated_Desc']))
t = get_predictions(resume)
    #print(t)
t['Phrase'] = t['Phrase'].apply(lambda x: x.strip())
t.drop_duplicates(subset='Phrase',inplace=True)
resume_keywords = pd.concat([resume_keywords, t], ignore_index=True)
#resume_keywords.head()

100%|██████████████████████████████████████| 535/535 [00:00<00:00, 50566.81it/s]




Unnamed: 0,Phrase,Scores
0,deep learningeducation details data science as...,0.865631
1,fraud analytic platform fraud analytics,0.837631
2,customer feedback survey data,0.833121
3,received customer feedback survey data,0.829445
4,multiple data science,0.81837


In [13]:
#to account for any issues with spaces between words after keyword extraction - 
#we added a list of common skills for sub string matching.
# eg 'pythonjavac...' the model wont recognize them as seperate words.
#This was mainly an issue with webscraped data with parsing issues.
skills_list = [
    'IDE','CMS','CRM','ERP','VCS','CI/CD','API','SDK','CMS',
    'DBMS',
    'RDBMS',
    'NoSQL',
    'SQL',
    'HTTP',
    'HTTPS',
    'FTP',
    'SSH',
    'TCPIP',
    'DNS',
    'VPN',
    'IoT',
    'JSON',
    'XML',
    'REST',
    'SOAP',
    'GraphQL',
    'SaaS',
    'PaaS',
    'IaaS',
    'DaaS',
    'MLaaS',
    'NLP',
    'VR',
    'Augmentedreality',
    'UI/UX',
    'UX','uxui','uiux',
    'API Gateway',
    'LoadBalancer',
    'Firewall',
    'ReverseProxy',
    'Containerization',
    'Orchestration',
    'ServerlessComputing',
    'MicroservicesArchitecture',
    'CDN',
    'FaaS',
    'CIAM',
    'SIEM',
    'EDM',
    'EDA',
    'CICD',
    'SSO',
    'JWT',
    'OAuth',
    'SSL',
    'TLS',
    'Docker',
    'Kubernetes',
    'Ansible',
    'Jenkins',
    'Git',
    'GitHub',
    'GitLab',
    'Bitbucket',
    'Jira',
    'Trello',
    'Confluence',
    'Slack',
    'Zoom',
    'MicrosoftTeams',
    'GoogleWorkspace',
    'Office 365',
    'AWS',
    'Azure',
    'GCP',
    'Cloud',
    'Heroku',
    'Firebase',
    'Netlify',
    'Vercel',
    'NetBeans',
    'Eclipse',
    'Visual Studio',
    'SublimeText',
    'Atom',
    'PyCharm',
    'IntelliJ', 'IDEA',
    'VS',
    'Postman',
    'Swagger',
    'Insomnia',
    'Wireshark',
    'PostgreSQL',
    'MySQL',
    'SQLite',
    'MongoDB',
    'Cassandra',
    'Redis',
    'Elasticsearch',
    'Oracle',
    'Firebase', 'Firestore',
    'Neo4j',
    'RabbitMQ',
    'Kafka',
    'Hadoop',
    'Spark',
    'TensorFlow',
    'PyTorch',
    'Scikitlearn',
    'Pandas',
    'NumPy',
    'Matplotlib',
    'Seaborn',
    'Bokeh',
    'Plotly',
    'Tableau',
    'PowerBI',
    'Excel',
    'GoogleSheets',
    'Airflow',
    'Luigi',
    'Glue',
    'Talend',
    'NiFi',
    'PowerShell',
    'Bash',
    'Python',
    'JavaScript',
    'Java',
    'C++',
    'C#',
    'Ruby',
    'Swift',
    'Kotlin',
    'TypeScript',
    'HTML',
    'CSS',
    'Sass',
    'React',
    'Angular',
    'Vuejs',
    'Nodejs',
    'Expressjs',
    'Django',
    'Flask',
    'SpringBoot',
    'Laravel',
    'Symfony',
    'Ruby',
    'ASP.NET',
    'Bootstrap',
    'TailwindCSS',
    'MaterialUI',
    'Ant Design',
    'D3js',
    'Threejs',
    'Unity',
    'UnrealEngine',
    'AndroidStudio',
    'Xcode',
    'Flutter',
    'react',
    'ReactNative',
    'Ionic',
    'Cordova',
    'Xamarin',
    'Redux',
    'MobX',
    'Vuex',
    'GraphQL',
    'ApolloClient',
    'ReduxSaga',
    'RxJS',
    'Jest',
    'Mocha',
    'Chai',
    'Cypress',
    'Selenium',
    'JUnit',
    'TestNG',
    'Appium',
    'Detox',
    'JUnit',
    'TestNG',
    'RobotFramework',
    'SoapUI',
    'Jira',
    'TestRail',
    'Confluence',
    'Zephyr',
    'Gatling',
    'Locust',
    'ApacheJMeter',
    'LoadRunner',
    'Nessus',
    'Wireshark',
    'BurpSuite',
    'Metasploit',
    'Nmap',
    'Splunk',
    'Logstash',
    'Kibana',
    'ELKStack',
    'ELK',
    'QRadar',
    'ArcSight',
    'AzureSentinel',
    'Graylog',
    'Loggly',
    'Auth0',
    'Okta',
    'PingIdentity',
    'Keycloak',
    'Cognito',
    'AzureAD',
    'OneLogin',
    'ForgeRock',
    'GoogleCloud',
    'JWTio',
    'OAuthio',
    'SSLMate',
    'Digicert',
    'LetsEncrypt',
    'HashiCorpVault',
    'LastPass',
    '1Password',
    'KeePass',
    'Dashlane',
    'BitLocker',
    'VeraCrypt',
    'Norton',
    'McAfee',
    'Avast',
    'Kaspersky',
    'Sophos',
    'ClamAV',
    'Malwarebytes',
    'TrendMicro',
    'Bitdefender',
    'FireEye',
    'CrowdStrike',
    'Symantec',
    'Fortinet',
    'Cisco',
    'Zscaler',
    'Akamai',
    'Cloudflare',
    'Imperva',
    'F5Networks',
    'Barracuda',
    'DL',
    'LLM',
    'GPT',
    'Analysis',
    'patterns',
    'visual',
    'query',
    'rstudio',
    'angular',
    'algorithm',
    'nueralnetworks',
    'engineer',
    'customer',
    'testing',
    'model',
    'analytic',
    'process',
    'function',
    'consult',
    'service',
    'project',
    'database',
    'analyze',
    'artificialintelligence',
    'kpi',
    'nlp',
    'naturallanguage',
    'sql',
    'dbms','data','database','code','scrape',
    'pca','nosql','olap','api','sdk','saas','uml','ebitda','manage','design','stakeholder','hadoop','spark',
    'pyspark','athena','s3','gcc','sage','mapreduce','regression','classification','randomforest','xgboost','gradient',
    'cluster','svm','bayes',"statistic",'project','product','scala','visualization','d3','golang','stack','php',
    'clean','django','analyst',"flask"
]

In [14]:
#assign some generic abbrev in tech for overwriting parts of speech conditions.
all_abbreviations = [
    # Data Science
    "EDA", "ML", "AI", "DS", "NLP", "CV", "PCA", "OLS", "ANOVA", "ROC", "AUC", "RMSE", "KPI", "ETL", "BI", "SQL","R",
    "AWS",'RF',
    
    # Database
    "DBMS", "SQL", "NoSQL", "RDBMS", "DDL", "DML", "ACID", "CAP", "OLAP", "OLTP", "MDM", "ETL","SAS",
    
    # Software
    "IDE", "API", "SDK", "CLI", "GUI", "UI", "UX", "CI/CD", "VCS", "CMS", "ERP", "SaaS", "IoT", "DevOps","uml",
    
    # Product Management
    "PM", "PO", "MVP", "KPI", "OKR", "USP", "B2B", "B2C", "ROI", "MRR", "CAC", "LTV", "NPS",
    
    # Finance
    "ROI", "ROE", "EPS", "P/E", "DCF", "IRR", "EBITDA", "CAGR", "AUM", "FOMO", "ETF", "IPO", "GDPR", "KYC", "AML"
]

In [15]:

key_word_list = list(set([s.lower() for s in skills_list]))
key_word_list[-1]

'cognito'

In [16]:
#adding generic stop words for job descriptions to existing list of stop words - 
#a draw back withour approach vs TFIDF where the common words are penalized
import spacy


# Define other stop words
other_stop_words = ['junior', 'senior', 'experience', 'etc', 'job', 'work', 'company', 'technique',
                    'candidate', 'skill', 'skills', 'menu', 'inc', 'new', 'plus', 'years',
                    'technology', 'ceo', 'cto', 'account','good','understanding',
                    'strong', 'specification', 'popular', 'essential','required','preferred','requirement',
                    'satisfy','people','resume','resumes','opportunities','able','responsibilities',
                    'group','distribution','potential','given','nondiscrimination','discrimination',
                    'transparency','seniority','ability','world','international','approach','dedicated','global','region','regions'
                   'responsibilities', 'qualifications', 'requirements', 'benefits', 'responsibility',
                    'qualification', 'requirement', 'benefit', 'role', 'position','specific','looking',
                    'opportunity', 'knowledge', 'abilities', 'team', 'collaboration','possess',
                    'environment', 'success', 'successful', 'candidate', 'candidates','want',
                    'requirements', 'required', 'preferred', 'preferably','opportunities','opportunity',
                    'skillset', 'apply', 'apply now', 'apply online', 'apply today', 'apply here', 'apply button',
                    'company','companies','companys', 'organization', 'industry', 'sector', 'field', 'domain', 'working',
                    'teamwork', 'team player', 'employee', 'employees', 'colleague', 'colleagues', 'professional',
                    'professionals', 'individual', 'individuals', 'managers','scientist','integrity',
                    'direct', 'supervisor','regional','physical','mental','disabilities',
                    'supervisory', 'managing', 'managed', 'manageable', 'performance', 'perform',
                    'performing', 'performed', 'goal', 'goals', 'objective', 'objectives', 'outcome', 'outcomes',
                    'initiative', 'initiatives', 'innovate','view','help','different',
                    'innovates', 'innovated', 'innovating', 'solution', 'solutions', 'creativity',
                    'create', 'creates', 'created', 'creating', 'results', 'outcome', 'outcomes','looking',
                    'implement', 'implements', 'implemented', 'implementing', 'develops', 'developed',
                    'developing', 'designed', 'designing','level','needs','need','familiarity',
                    'evaluated', 'evaluating', 'strong', 'excellent','committed','potential','employment',
                    'effective', 'efficient', 'successful', 'outstanding', 'superior', 'proven', 'demonstrated',
                    'abilities', 'aptitude', 'talent', 'talented','veteran','status','compensation','permanent'
                    'experienced', 'expert', 'proficient', 'qualification', 'qualifications', 'degree', 'education',
                    'required', 'preferred','based','intensive','hidden','presence','harassment','including',
                    'industry', 'sector', 'field', 'domain', 'area', 'technical', 'technological','help',
                    'technology', 'solution', 'solutions','person','right','passport','citizen','written',
                    'think','existing','salary','consideration','miss','chance','vacancy','related','relevant',
                    'procedure', 'method', 'best practices', 'standard', 'standards', 'compliance','changes',
                    'regulation', 'regulatory', 'policy', 'policies', 'procedure', 'procedures', 'guideline',
                    'guidelines', 'protocol', 'protocols', 'manual', 'manuals', 'document', 'documents',
                    'collaborate', 'collaboration', 'coordinate', 'coordination','referred','following',
                          'interact', 'interaction', 'interpersonal', 'teamwork', 'team player', 'cross-functional',
                          'multi-disciplinary', 'multi-functional', 'work well', 'adapt', 'flexible', 'fast-paced',
                          'innovation', 'creativity', 'solution-oriented','grasp','demand','accept','privacy','notice',
                          'mentoring', 'coach','national','origin','based','gender','lunch','food','minimum',
                          'coaching', 'train', 'training', 'develop', 'development', 'growth', 'learning', 'learner',
                          'continuous improvement', 'professional development', 'career growth', 'self-starter', 'initiative',
                          'self-motivated', 'motivation', 'enthusiastic', 'passionate','learn','willing','state','art']
#print(other_stop_words[-1])
# Create a new spaCy model with updated stop words
nlp = spacy.load("en_core_web_sm")
#print(nlp('project')[0].is_stop)
# Update stop words
nlp.Defaults.stop_words |= set(other_stop_words)

art
False


In [17]:
# this functions returns a list of words after filtering out irrelevant words from the phrases
import re
abbrev_skills = [a.upper() for a in all_abbreviations]
def key_words(phrase):
    
    cleaned_text = re.sub(r'[^a-zA-Z0-9+#]+', ' ', str(phrase))
    doc = nlp(cleaned_text)
    
    taggers = ["learning", "programming","tool",'packages','tools','interface','service','studio',
              'reality','365','networks','science','series']

    key_words = []
    i=0
    while i< len(doc):
    
        token=doc[i]
        if token.is_stop:
            i+=1
            continue
        if token.pos_ in ["PROPN"] or token.text.upper() in abbrev_skills:
            key_words.append(token.text)
            i+=1
            continue
    
        elif token.pos_ in ["NOUN","X","ADV"]:
            combined_noun = token.text
        
            if i + 1 < len(doc) and doc[i + 1].text.lower() in  taggers:
                combined_noun += " " + doc[i + 1].text
                i += 1
            key_words.append(combined_noun)
        elif token.pos_ in ["ADJ","VERB"]:
        #print(token.text)
        #converted_word = token.lemma_
        # If you want to convert to a noun, use the .noun_ attribute
        #if converted_word:
            #combined_nouns.append(converted_word)
        #else:
            key_words.append(token.text)
        i+=1
    return key_words
    

In [18]:
#further cleaning the words where there is an issue with spaces.
def keyword_map(text):
    key_match = []
    

    for s in key_word_list:
        if s in text:
            key_match.append(s)
    #print(len(key_match))
    if len(key_match)==0:
        #print(1)
        key_match.append(text) 

    return key_match

In [19]:
#extract key words from the phrases generated from the model
resume_keywords['key_words'] = resume_keywords['Phrase'].apply(lambda x: key_words(x))
resume_keywords1 = resume_keywords[resume_keywords["key_words"].apply(lambda x: len(x) > 0)]
resume_keywords1 = resume_keywords1.explode('key_words', ignore_index=True)
resume_keywords1['cleaned_keywords']=resume_keywords1['key_words'].apply(lambda x: keyword_map(x))
resume_keywords1 = resume_keywords1.explode('cleaned_keywords', ignore_index=True)
#resume_keywords.sample(5)

In [23]:
#get all the titles which were processed using the model using jod descriptions
titles = result_df2['job_title'].unique()
#print(titles)

['Consultant' 'DevOps' 'business analyst' 'business manager'
 'data analyst' 'data engineer' 'data scientist' 'database administrator'
 'machine learning engineer' 'product manager' 'project manager'
 'research scientist' 'software developer']


In [20]:
#remove duplicate key words and keep the max score for each
resume_keywords2 = resume_keywords1.groupby(['cleaned_keywords'])['Scores'].max().reset_index(name='Scores')
#resume_keywords2.head(10)

Unnamed: 0,cleaned_keywords,Scores
0,advanced,0.692085
1,analysis,0.772277
2,analytic,0.837631
3,anomalies,0.633634
4,answer,0.645638
5,assisting,0.408432
6,assists,0.719214
7,assurance,0.865631
8,attacks,0.56371
9,attacks tools,0.475892


In [None]:
#match the resume keywords with each job titles key words and take the scalar product of their score and normalize 
#the score on the scale of 0-1 or %scale

s1 = set(resume_keywords2['cleaned_keywords'])
#normalizing factor for 2 types of scoring
total_score2 = resume_keywords2['Scores'].sum()
total_score1 = (resume_keywords2['Scores']**2).sum()
print(total_score1)
print(total_score2)
match_dict = {'Job title': [], 'Match Score1': [], 'Match Score2': []}
best_match = ''
best_score = 0
for t in titles:
    d2 = result_df2[result_df2["job_title"]==t]
        
    s2 = set(d2['cleaned_keywords'])
    skill_keys = s1.intersection(s2)
    score1 = 0
    score2 = 0
    
    #print(pair['Job1'],pair['Job2'])
    if skill_keys:
        for skill in skill_keys:
                #resume score for skill
            p1 = float(resume_keywords2[resume_keywords2['cleaned_keywords'] == skill]['Scores'].iloc[0])
                #job desc score for skill
            p2 = float(d2[d2['cleaned_keywords'] == skill]['avg_score'].iloc[0])
            #print(float(p2))
            #print(p1*p2)
            #dot product for scoreing
            score1 += p1*p2
            #using only score from resume to calculate the job fit scoring
            score2 += p1
    score1 = score1*1000/total_score1
    score2 = score2*100/total_score2
    match_dict['Job title'].append(t)
    match_dict['Match Score1'].append(score1)
    match_dict['Match Score2'].append(score2)
    if score1 > best_score:
        best_score = score1
        best_match = t
            
            
    #if i<10:
        #print(match_dict)
        
job_match = pd.DataFrame(match_dict)

job_match = job_match.sort_values(by='Match Score1', ascending=False)

#job_match

64.12134
92.93382


Unnamed: 0,Job title,Match Score1,Match Score2
6,data scientist,90.127777,56.231506
5,data engineer,79.476376,55.51654
4,data analyst,75.454283,56.707234
8,machine learning engineer,72.215901,54.688661
7,database administrator,70.20904,43.452612
2,business analyst,69.518616,53.299861
9,product manager,69.356174,51.19481
1,DevOps,69.045185,49.606561
0,Consultant,68.388773,50.920409
3,business manager,64.359428,48.369488


In [29]:
#print(resume_df['Category'].iloc[0])

Data Science


In [None]:
#output that will be fed into the data visualization in real time
job_match1 = job_match[['Job title','Match Score1']]