<a href="https://colab.research.google.com/github/Siliconvalley4uYouthProjects/SpringBoard-Swatcloud/blob/main/Recommendation_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing libraries

import pandas as pd
import numpy as np
import nltk
import regex as re
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem.wordnet import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
from pandas.core.common import SettingWithCopyWarning
import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
from google.colab import drive
#drive.mount('/content/drive')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Mounted at /content/drive


In [None]:
# Reading in the jobs - This code should be replaced with the correct path that contains the job data.
df = pd.read_csv('/content/drive/MyDrive/Swatcloud/Data/web_scraper/all_job_categories/Output Files/combined_all_companies_all_jobs.csv',header=None,names=['Company','Job Title','Job Link','Job Description'],skiprows=1)
df.dropna(inplace=True)
df.reset_index(inplace=True, drop=True)
df

### Now we supply a new data point, and let the model output recommended job titles based on the calculated cosine similarity of this new data point and the existing job descriptions.

In [None]:
def top_x_recommendations(x,DataFrame,description):
  # Text Cleaning tasks
  # Removing new line characters
  description = description.replace('\n', ' ')
  # Removing special characters
  description = description.replace(r'[^\w\s]+', '')
  description = re.sub(r'[^a-zA-Z0-9]', ' ', description)
  # Converting the text to lowercase
  description = description.lower()
  # Removing empty leading and trailing spaces 
  description = description.strip()
  # Splitting each word
  description = description.split(' ')

  text = []
  lemmatizer = WordNetLemmatizer()
  for i in range(len(description)):
      if description[i] not in list(stopwords.words('english')):
        word = lemmatizer.lemmatize(description[i])
        word = ''.join(x for x in word if x.isalnum())
        text.append(word)
  description = text
  description = ' '.join(description)
  description = re.sub(' +', ' ', description)
  new_data_input = [description]

  # transform the new data point using CountVectorizer
  countVector = CountVectorizer(stop_words = 'english')
  countMatrix = countVector.fit_transform(DataFrame['Job Description'])
  new_data_transformed = countVector.transform(new_data_input)

  # calculate cosine similarities of the new data point with all of the job descriptions
  cosine_sim = cosine_similarity(new_data_transformed, countMatrix)

  # collect the top x recommendations
  top_x = pd.DataFrame(cosine_sim.T, columns=['Similarity Score']).sort_values(by='Similarity Score', ascending=False)[1:x+1]
  similarity = []
  for score in top_x['Similarity Score']:
    if score > 0.4:
      similarity.append('Very Strong Match')
    elif score > 0.35:
      similarity.append('Strong Match')
    elif score > 0.25:
      similarity.append('Good Match')
    else:
      similarity.append('Loose Match')
  top_x['Match Strength'] = similarity  
  
  top_x['Company'] = 'none'
  top_x['Job Title'] = 'none'  
  
  top_x = top_x.reset_index()
   
  for i in range(len(top_x.index)):
    job_index = top_x.iloc[i]['index']
    top_x['Company'][i] = DataFrame['Company'][job_index]
    top_x['Job Title'][i] = DataFrame['Job Title'][job_index]
    
  top_x = top_x[['index', 'Job Title', 'Company', 'Similarity Score', 'Match Strength']]
  top_x = top_x.rename(columns = {'Similarity Score': 'Similarity', 'Match Strength': 'Match'})

  # print out the top x job descriptions
  cols = ['Job Title', 'Company', 'Similarity', 'Match']
  print(top_x[cols])
  
  # print applicant's qualifications
  print("\nApplicant's qualifications: ", new_data_input[0], '\n' )
  print('Job Descriptions for the Recommended jobs:')

  # print job descriptions for the recommended jobs
  for j in range(len(top_x.index)):
    job_index = top_x.iloc[j]['index']
    print(j, '. ', 'Job Description:', DataFrame['Job Description'][job_index], '\n')

In [None]:
df.iloc[10]

Company                                                    Microsoft
Job Title                                        Electrical Engineer
Job Link           https://careers.microsoft.com/us/en/job/150512...
Job Description     5+ years of experience in Quality, Manufactur...
Name: 10, dtype: object

In [None]:
top_x_recommendations(10,df,df['Job Description'].iloc[10])

                                           Job Title    Company  Similarity  \
0                            Program Quality Manager  Microsoft    0.868457   
1                               Supply Chain Manager  Microsoft    0.803626   
2                        Director of Supply Planning  Microsoft    0.775651   
3                 Senior Director of Supply Planning  Microsoft    0.769026   
4            Regional Operations Manager, Logistics      Google    0.692989   
5             Regional Logistics Operations Manager      Google    0.685325   
6   Supply Chain Program Manager, Global Strategy...     Google    0.669999   
7                           Manager, Supply Planning  Microsoft    0.653220   
8   Program Manager II, Supply Chain, New Product...     Google    0.652541   
9             Senior Fulfillment & Logistics Manager  Microsoft    0.649179   

               Match  
0  Very Strong Match  
1  Very Strong Match  
2  Very Strong Match  
3  Very Strong Match  
4  Very Strong 

**Testing Using Non-Amazon Jobs**

In [None]:
#Now let's try to use an input from a non-Amazon job.
df.iloc[8514]

Company                                                       Google
Job Title                   Data Scientist Intern, PhD, Summer 2023 
Job Link           https://careers.google.com/jobs/results/911515...
Job Description    Currently pursuing a PhD degree in a quantitat...
Name: 8514, dtype: object

In [None]:
top_x_recommendations(10,df,df['Job Description'].iloc[8514])

                                           Job Title Company  Similarity  \
0           Product Analyst Intern, MS, Summer 2023   Google    0.575829   
1     Software Engineering Intern, PhD, Summer 2023   Google    0.509456   
2       Software Developer Intern, PhD, Summer 2023   Google    0.500193   
3                 Research Intern, PhD, Summer 2023   Google    0.484447   
4     Technical Program Manager Intern, Summer 2023   Google    0.470824   
5                 Research Intern, PhD, Summer 2023   Google    0.468564   
6   User Experience Research Intern, PhD, Summer ...  Google    0.438095   
7         People Analytics Intern, PhD, Summer 2023   Google    0.434406   
8                     Student Researcher, PhD, 2023   Google    0.434047   
9   User Experience Research Intern, MS, Summer 2...  Google    0.428571   

               Match  
0  Very Strong Match  
1  Very Strong Match  
2  Very Strong Match  
3  Very Strong Match  
4  Very Strong Match  
5  Very Strong Match  
6 

In [None]:
df.iloc[2662]

Company                                                          JnJ
Job Title                                           Senior Engineer 
Job Link           https://jobs.jnj.com/en/jobs/2206070798w/senio...
Job Description     Bachelor’s degree in mechanical or biomedical...
Name: 2662, dtype: object

In [None]:
top_x_recommendations(10,df,df['Job Description'].iloc[2662])

                                           Job Title    Company  Similarity  \
0                               Senior Engineer, R&D        JnJ    1.000000   
1                                Staff Engineer R&D         JnJ    0.771744   
2                             CE Mechanical Engineer  Microsoft    0.721688   
3                  Senior Manager Regulatory Affairs        JnJ    0.721688   
4                             Senior Design Engineer        JnJ    0.678401   
5                             CE Mechanical Engineer  Microsoft    0.675865   
6  Critical Environment Mechanical Engineer CTJ - TS  Microsoft    0.629207   
7  Cryopreservation Quality Assurance Car-T Speci...        JnJ    0.625000   
8                            R&D Engineering Manager        JnJ    0.625000   
9                      CE Senior Mechanical Engineer  Microsoft    0.615648   

               Match  
0  Very Strong Match  
1  Very Strong Match  
2  Very Strong Match  
3  Very Strong Match  
4  Very Strong 

**Testing using sampled resumes**

Candidate #1: Software Engineer @ Microsoft with prior experience as a team lead for engineers working in sales analytics

In [None]:
top_x_recommendations(10,df,'Azure Kubernetes Service (AKS) team - Built a web server in Golang and wrote unit tests. Designed and deployed it with a microservices architecture, leveraging Kubernetes communication patterns. - Containerized the web server using a multistage Docker build process and packaged it into a Helm Chart. Set up a CI/CD pipeline to automatically test and deploy application to an AKS cluster. - Created a logging and metrics infrastructure, using Azure Log Analytics, Prometheus, and Grafana to monitor application behavior and system health. Led a team of engineers and product managers to shape the evolution of Dropbox’s central sales analytics tool. While leading the team, we: - Increased adoption of the platform by 2.4x in 2020. - Built a PowerPoint generation engine to deliver customized, data-driven sales materials at scale, enhancing productivity of the sales organization by ~500 hours per quarter. - Advised global sales teams on strategic customers through bespoke analytics. Designed a scalable model that resulted in a 10% increase in ARR influenced each quarter.')

                                           Job Title    Company  Similarity  \
0             VP Data & Analytics – Ultimate Rewards        JPM    0.212709   
1                        Agility Lead (Scrum Master)        JPM    0.205960   
2                          Software Engineering Lead        JPM    0.204456   
3  Private Bank - Analytics and Tagging Product A...        JPM    0.203925   
4           CIB - Transactions Manager I - 2nd shift        JPM    0.203076   
5                               Software Engineering        JPM    0.200511   
6  Vice President, Marketing - Campaign Managemen...        JPM    0.200202   
7  CCB Treasury Executive Director - Pricing and ...        JPM    0.198320   
8             Principal Security Operations Engineer  Microsoft    0.198132   
9   Manager, Application Architecture - DAS Audit...   Deloitte    0.198001   

         Match  
0  Loose Match  
1  Loose Match  
2  Loose Match  
3  Loose Match  
4  Loose Match  
5  Loose Match  
6  Loose Ma

Candidate #2: Software Engineer @ Amazon with academic background in computer science and data science

In [None]:
top_x_recommendations(10,df,'-Worked on the backend team to develop the Edtera web application, a learning engagement platform, using the Java Spring MVC Framework -Implemented data access layer using Spring Data JPA to allow various CRUD services to Edtera’s PostgreSQL database -Developed a performance tracker using Spring RestTemplate to retrieve students’ enrollment and grade data from third party learning management systems and configured RestTemplate Interceptor to reduce redundancy in the code -Built RESTful services to publish data by creating Rest Controllers, such as grades, course, enrollment information, etc. -Developed a high-performance laser health monitoring program with Python, which was highly recognized by the course instructor and project sponsor and selected for exhibition at the Department Senior Design Day -Implemented Random Forest Regression using Scikit-Learn library to predict laser survival rate, achieving MAPE of 12% -Created an interactive data visualization web application with Python Dash framework for explorative analysis -Designed a feature engineering procedure to sum the time series data and convert it to a supervised-learning problem')

                                           Job Title    Company  Similarity  \
0                    Federal - Senior Data Architect  Accenture    0.417321   
1       Federal - Data Strategy & Management Manager  Accenture    0.380363   
2               Data Engineering Solution Associate    Deloitte    0.373577   
3                     SAP Data Management Consultant        IBM    0.372678   
4                        Data Engineer Summer Intern        JnJ    0.360744   
5                     Senior Cloud Big Data Engineer        JPM    0.355947   
6        Data Analytics Practitioner with Polygraph    Deloitte    0.355252   
7  Senior Data Scientist - Machine Learning Opera...        JPM    0.351399   
8             Federal - Data Strategy Senior Manager  Accenture    0.342962   
9                  Senior Manager - Data Engineering         HP    0.342095   

               Match  
0  Very Strong Match  
1       Strong Match  
2       Strong Match  
3       Strong Match  
4       Strong 

Candidate #3: Senior Marketing Analytics Manager @ Rippling with extensive work history as a marketing data analyst

In [None]:
top_x_recommendations(10,df,'1. Create measurement framework across different funnel stages (TOF, MOF, and BOF) and marketing channels & campaigns: 1) identify KPIs (primary & secondary) and 2) define leading indicators 2. Build reporting foundation and consistently report on 1) actuals against goals by segment, marketing channel, campaign, and product and 2) trend on performance across acquisition and cross-sell motions. Identify gaps on data tracking, data connection, and reporting infrastructure and implement solutions. 3. Develop a framework to measure channel and campaign effectiveness and efficiency through attribution (FT, LT, & MT) and incrementality (MMM & geo-based experiments). Marketing channels include Paid Social (LinkedIn & Facebook & YouTube), Paid & Organic Search, Review Sites, and OOH 4. Hire and grow a Marketing Analytics team, mentor and coach analyst(s) to deliver high-quality work 1. Acquisition Marketing Channel and Campaign Analysis - Measure acquisition marketing campaign performance through Geo experiments, time series models, and platform lift studies; Channels include brand media, such as TV, OTT, Streaming Audio, & Podcast, and OA media, such as Display, OLV, Paid Social, Paid Search, and Affiliates - Design and measure channel incrementality tests (PPC Brand & OTT/CTV) and other tests, such as bid algorithm test and landing page test, across different product segments - Provide on-going channel and campaign performance analysis via prediction models, pre-post analysis, and A/B tests; Create channel performance dashboard (Holistic Search) to inform efficiency - Provide insights and recommendations on channel and campaign performance to marketing stakeholders and the leadership team based on the test and analysis results 2. Product Performance Analysis - Analytics lead on weekly marketing acquisition performance across all channels (Brand and OA) for Quickbooks Online product. - Work with finance, marketing, sales, and other analytics teams to identify performance root causes. - Define full-funnel metrics to measure marketing acquisition performance, such as brand metrics, QBO brand and industry search demand, traffic and conversion rate, sales, and CPA. 1. CRM/Database Marketing Management and Strategies - CRM database and email & direct mail channels owner and work with sales team for outbound call campaigns. Develop, optimize, monitor, and execute database marketing campaigns including re-marketing, cross-sell, acquisition, and win-back. - Analyze CRM customer & campaign data on different segments to improve overall DBM campaign performance. Auto re-marketing campaign sales contribute 13.5% of total company sales in 2016. - Provide on-going analysis on CRM data to define targeting and segmentation strategies for marketing acquisition campaigns. Design A/B and multivariate tests and define KPIs to measure test success. - Oversee design, development, and maintenance of a CRM database. Work with a cross-functional team (product, analytics, development, and BI) to define database and data integration requirements. - Collaborate with data science, sales, BI, and IT teams to build and implement models used for segmentation and targeting strategies to increase customer lifetime value and campaign ROI. - Provide insights and recommendations to the senior management team to influence decision making. 2. Marketing Analytics - Lead all aspects of Marketing Analytics, including campaign analysis, reporting, and predictive modeling. Marketing channels include email, SEM, affiliates, paid social, and sponsorship. - Provide on-going analysis on CRM data to inform targeting and segmentation strategies for marketing acquisition campaigns. Design tests and define KPIs to measure campaign and test success. - Analyze cost, engagement, and sales data to propose U.S. budget re-allocation recommendations across marketing channels to increase conversion, ROI, and improve customer mix. - Set KPIs and lead efforts to provide and consolidate analysis and reporting for website testing. - Provide insights and recommendations to the senior management team to influence decision making.')

                                           Job Title    Company  Similarity  \
0  Marketing Manager, Advanced Compute Solutions/...         HP    0.374408   
1                     Performance Marketing Director        JPM    0.365424   
2         Vice President - Digital Marketing Manager        JPM    0.362804   
3  Vice President, Marketing - Campaign Managemen...        JPM    0.360134   
4              Sr. Director, Web Marketing Campaigns  Microsoft    0.341826   
5  Senior Marketing Manager, On Road Staffing - R...     Amazon    0.337890   
6  Head of Chase Wealth Management Marketing, Exe...        JPM    0.337376   
7          AM - SMA and Model Portfolio Marketing VP        JPM    0.335133   
8   Social Media Lead - Cloud Marketing Organization  Microsoft    0.328559   
9             Senior Associate, Marketing Operations       KPMG    0.326649   

          Match  
0  Strong Match  
1  Strong Match  
2  Strong Match  
3  Strong Match  
4    Good Match  
5    Good Match  
6   

Candidate #4: Product Manager @ Meta with prior experience as a consultant at Deloitte Digital

In [None]:
top_x_recommendations(10,df,'Product Manager for Strategic Transformation Tool: Led a nine-person product team to design, build, and launch StrategyAccelerator® - a single, customizable digital platform to help companies drive ideation to implementation across their business strategy life cycle, resulting in 17 client wins and $xxM revenue in direct asset fees and services for Deloitte Product and Monetization Strategy for a Pharmaceutical Company: Managed a five-person team to develop and prioritize 42 use cases and monetization opportunities of facial and retinal scans towards disruptive applications of AI and ML for disease detection and treatment targeting direct-to-consumer opportunities Marketing Strategy, Operations, and Annual Planning for $9B Cloud Technology Company: Led a five-person team working directly with the CMO and SVPs to spearhead annual strategic planning, and successfully secured ~$600M, the highest % of investment allocation for the marketing organization thus far from the C-suite eCommerce Partnership Strategy for $600B Global Social Media Company: Led leadership strategy workshops engaging cross-functionally with sales, product development, and privacy compliance to identify target large and mid-size retailers for ecommerce offering expansion, and develop a GTM plan for ~60M potential buyers Customer Experience and Journey Mapping for Fortune 500 Financial Services Company: Designed product roadmap to improve customer satisfaction and retention; directed ethnographic research leading to insights on key customer behaviors via quantitative surveys and interviews to understand customer journeys and shape new service offerings')

                                           Job Title Company  Similarity  \
0  Vice President of Product - Machine Learning C...     JPM    0.362043   
1  Product Manager Sr. Correspondent Origination ...     JPM    0.346666   
2  Product Marketing Vice President, Merchant Wor...     JPM    0.339242   
3  CCB Risk Product Delivery – Cloud Modernizatio...     JPM    0.331566   
4  Product Delivery Associate - ML Model Delivery...     JPM    0.325012   
5             Product Analyst for Branded Cards - VP     JPM    0.323702   
6                Product Owner - Transformation Team     JPM    0.322587   
7  Product Analyst - Claims Disputes & Fraud Oper...     JPM    0.322202   
8                 Digital Mobile Sr. Product Manager     JPM    0.320770   
9  J.P. Morgan Wealth Management - Self Directed ...     JPM    0.319345   

          Match  
0  Strong Match  
1    Good Match  
2    Good Match  
3    Good Match  
4    Good Match  
5    Good Match  
6    Good Match  
7    Good Match  
8

Candidate #5: Investment Banker @ Brookwood Associates. No background in tech - purely a finance person.

In [None]:
print(top_x_recommendations(10,df,'- Represented CAIRE on its second U.S. acquisition of MGC Diagnostics, a manufacturer of cardiorespiratory diagnostics systems - Represented provider of transactional communications solutions on its sale to Doxim, a portfolio company of GI Partners - Represented CAIRE, a subsidiary of NGK Spark Plugs, on its acquisition of an e-commerce seller of portable oxygen concentrators and other respiratory products - Represented manufacturer of advanced composite materials on its recapitalization - Represented provider of water and wastewater infrastructure services on its sale to Sciens Water - Represented provider of center-based, home-based, and school-based behavioral healthcare services for individuals with autism spectrum disorders on its sale to LEARN Behavioral, a portfolio company of Gryphon Investors - Represented provider of urgent care services on its merger with CRH Healthcare - Represented distributor of industrial air compressors and compressed air automation systems on its recapitalization - Represented manufacturer of domestic and imported hardwood lumber on the divestiture of its distribution business to the Rugby Architectural Building Products division of Hardwoods Distribution Inc. (TSX:HDI) - Represented vertically integrated manufacturer of technical and performance fabrics on its sale to Milliken & Company - Represented manufacturer of wakeboard towers and accessories and custom-patterned boat covers on its sale to a financial sponsor - Represented specialty mattress retailer with 130+ locations in the Midwest and Southeast on its sale to Mattress Firm'))

                                           Job Title   Company  Similarity  \
0   Health Actuary Consulting Manager (Multiple L...  Deloitte    0.120670   
1   Service Excellence Contact Center Technical A...  Deloitte    0.118332   
2   Service Excellence Contact Center Technical A...  Deloitte    0.118332   
3  Wealth Management - VP - Service Center Relati...       JPM    0.109006   
4                  Oracle Field Service Cloud Config       IBM    0.105976   
5  Payments Client Service Account Manager-Billin...       JPM    0.101886   
6                     Manager of RISK and COMPLIANCE       JnJ    0.099329   
7   Health System Marketing Manager - GTA - Johns...       JnJ    0.098974   
8        Senior Associate, Cloud Operations - Remote      KPMG    0.098116   
9  Vice President of Product - Machine Learning C...       JPM    0.095439   

         Match  
0  Loose Match  
1  Loose Match  
2  Loose Match  
3  Loose Match  
4  Loose Match  
5  Loose Match  
6  Loose Match  
7  Lo