In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, re
from IPython import display

In [2]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

# Define file paths (update with your actual Google Drive path)
indeed_path = "/content/drive/My Drive/indeed_cleaned.csv"
linkedin_path = "/content/drive/My Drive/linkedin_cleaned.csv"
glassdoor_path = "/content/drive/My Drive/glassdoor_cleaned.csv"
output_path = "/content/drive/My Drive/merged_jobs.csv"

# Load datasets
indeed_df = pd.read_csv(indeed_path, usecols=['company_name', 'job_title', 'job_type', 'description'])
linkedin_df = pd.read_csv(linkedin_path, usecols=['job_title', 'company_name', 'job_summary', 'job_seniority_level', 'job_function', 'job_employment_type', 'job_industries'])
glassdoor_df = pd.read_csv(glassdoor_path, usecols=['company_name', 'job_title', 'job_overview', 'company_industry', 'company_sector'])

# Rename columns for consistency
indeed_df.rename(columns={'description': 'job_description'}, inplace=True)
linkedin_df.rename(columns={'job_summary': 'job_description', 'job_employment_type': 'job_type'}, inplace=True)
glassdoor_df.rename(columns={'job_overview': 'job_description'}, inplace=True)

# Merge datasets on 'company_name' and 'job_title'
merged_df = pd.merge(indeed_df, linkedin_df, on=['company_name', 'job_title'], how='outer')
merged_df = pd.merge(merged_df, glassdoor_df, on=['company_name', 'job_title'], how='outer')

# Save to Google Drive
merged_df.to_csv(output_path, index=False)

print(f"Merged dataset saved to: {output_path}")


Mounted at /content/drive
Merged dataset saved to: /content/drive/My Drive/merged_jobs.csv


In [3]:
# Combine job_description columns, taking the first non-null value
merged_df['job_description'] = merged_df[['job_description_x', 'job_description_y', 'job_description']].bfill(axis=1).iloc[:, 0]

# Drop old job_description columns
merged_df.drop(columns=['job_description_x', 'job_description_y'], inplace=True)

# Save the cleaned dataset
output_path = "/content/drive/My Drive/merged_jobs_cleaned.csv"
merged_df.to_csv(output_path, index=False)

print(f"Cleaned merged dataset saved to: {output_path}")


Cleaned merged dataset saved to: /content/drive/My Drive/merged_jobs_cleaned.csv


In [4]:
# Combine job_description columns, taking the first non-null value
merged_df['job_type'] = merged_df[['job_type_x', 'job_type_y', ]].bfill(axis=1).iloc[:, 0]

# Drop old job_description columns
merged_df.drop(columns=['job_type_x', 'job_type_y'], inplace=True)

# Save the cleaned dataset
output_path = "/content/drive/My Drive/merged_jobs_cleaned.csv"
merged_df.to_csv(output_path, index=False)

print(f"Cleaned merged dataset saved to: {output_path}")


Cleaned merged dataset saved to: /content/drive/My Drive/merged_jobs_cleaned.csv


Dropping rows  with job skill levels above mid-senior
**Only including internship, entry, associate, and mid-senior level positions

In [7]:
import os

output_dir = "/content/drive/My Drive"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

exclude_terms = ['director', 'executive', 'specialist', 'phd']
exclude_terms2 = ['director', 'executive', 'senior', 'phd', 'sr', 'sr.']

# Drop rows where 'job_seniority_level' contains any of the keywords
merged_df = merged_df[~merged_df['job_seniority_level'].str.contains('|'.join(exclude_terms), case=False, na=False)]


# Drop rows where 'job_title' contains any of the keywords
merged_df = merged_df[~merged_df['job_title'].str.contains('|'.join(exclude_terms2), case=False, na=False)]
merged_df.head()

output_path = "/content/drive/My Drive/merged_jobs_cleaned.csv"
merged_df.to_csv(output_path, index=False)

print(f"Cleaned merged dataset saved to: {output_path}")

from google.colab import files
files.download('/content/drive/My Drive/merged_jobs_cleaned.csv')


Cleaned merged dataset saved to: /content/drive/My Drive/merged_jobs_cleaned.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## For Software Development

In [None]:
# Define software development skills (Fixed missing commas)
software_dev_skills = [
    # Programming Languages
    "Python", "JavaScript", "Java", "C#", "C++", "TypeScript", "Ruby", "Swift", "Kotlin", "Go", "Rust", "PHP", "SQL",

    # Web Development
    "HTML", "CSS", "React", "Angular", "Vue.js", "Node.js", "Django", "Flask", "Spring Boot", "ASP.NET", "GraphQL",

    # Databases
    "MySQL", "PostgreSQL", "MongoDB", "SQLite", "Redis", "Firebase", "DynamoDB", "SQL Server", "Oracle", "Cassandra", "NoSQL",

    # Cloud & DevOps
    "AWS", "Azure", "Google Cloud Platform", "Docker", "Kubernetes", "Terraform", "CI/CD", "Jenkins", "GitHub Actions", "Cloud", "Cloud Computing", "AI",

    # Version Control & Collaboration
    "Git", "GitHub", "GitLab", "Bitbucket",

    # Mobile Development
    "Android", "iOS", "Flutter", "React Native", "Xamarin",

    # Testing & Debugging
    "JUnit", "Selenium", "Cypress", "PyTest", "Mocha", "Jest", "Postman",

    # Operating Systems & Environments
    "Linux", "Windows", "macOS", "Bash", "PowerShell", "Unix",

    # IDEs & Code Editors
    "VS Code", "IntelliJ IDEA", "PyCharm", "Eclipse", "Android Studio", "Xcode", "NetBeans",

    # Software Development Concepts
    "Object-Oriented Programming (OOP)", "Functional Programming", "Design Patterns", "Agile", "Scrum", "TDD (Test-Driven Development)",
    "Microservices", "REST API", "GraphQL API", "Multithreading", "Concurrency", "Data Structures", "Algorithms", "Machine Learning"

    # General Soft Dev terms
    "Engineer", "Developer", "Dev", "Technology", "Project Manager", "Project", "Development", "Management"
]


sd_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in software_dev_skills
}

sd_keyword_df = pd.DataFrame(list(sd_keyword_counts.items()), columns=['Keyword', 'Count'])

sd_keyword_df = sd_keyword_df.sort_values(by='Count', ascending=False)
sd_keyword_df.head(30)



Unnamed: 0,Keyword,Count
46,AI,2484
9,Go,1871
91,Dev,1821
96,Management,1355
95,Development,1322
94,Project,1037
92,Technology,822
35,AWS,607
47,Git,601
10,Rust,415


## SoftDev Job Titles

In [None]:
software_dev_job_titles = [
    # General Software Engineering Roles
    "Software Engineer", "Software Developer", "Full Stack Developer", "Backend Developer",
    "Frontend Developer", "Application Developer", "Software Architect", "Embedded Software Engineer",

    # Web Development
    "Web Developer", "Front End Engineer", "Back End Engineer", "Full Stack Engineer",
    "JavaScript Developer", "UI Developer", "UX Developer",

    # Mobile Development
    "Mobile Developer", "Android Developer", "iOS Developer", "Flutter Developer",
    "React Native Developer", "Mobile Application Developer",

    # DevOps & Cloud Engineering
    "DevOps Engineer", "Cloud Engineer", "Site Reliability Engineer (SRE)", "Infrastructure Engineer",
    "Platform Engineer", "Systems Engineer", "Build and Release Engineer",

    # Data & AI/ML Engineering
    "Data Engineer", "Machine Learning Engineer", "AI Engineer", "Deep Learning Engineer",
    "Big Data Engineer", "NLP Engineer", "Data Scientist",

    # Security & Cybersecurity Development
    "Security Engineer", "Application Security Engineer", "Cybersecurity Engineer",
    "Penetration Tester", "Security Software Developer",

    # Embedded & Systems Development
    "Firmware Engineer", "Embedded Systems Engineer", "IoT Engineer", "Systems Software Developer",

    # Database & Backend Specializations
    "Database Engineer", "SQL Developer", "Database Administrator (DBA)", "API Developer",
    "Microservices Developer", "Cloud Backend Engineer",

    # Game Development
    "Game Developer", "Game Engine Developer", "Unity Developer", "Unreal Engine Developer",
    "Gameplay Programmer", "Graphics Programmer",

    # QA & Testing Roles
    "QA Engineer", "Software Test Engineer", "Automation Engineer", "Test Automation Engineer",
    "Performance Engineer", "Quality Assurance Analyst",

    # Leadership & Management Roles
    "Software Engineering Manager", "Tech Lead", "Development Team Lead", "Principal Software Engineer",
    "Engineering Director", "CTO (Chief Technology Officer)", "VP of Engineering",

    # Specialized Developer Roles
    "Blockchain Developer", "AR/VR Developer", "Metaverse Developer", "Computer Vision Engineer",
    "AI Ethics Engineer", "Algorithm Engineer", "Simulation Engineer", "Low-Level Systems Developer"

    # Other
    "Engineer", "Developer", "Project Manager", "QA", "Testing", "Software Testing", "Game Development", "Cloud",
    "Front End Developer"
]

sd_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in software_dev_job_titles
}

sd_jobs_df = pd.DataFrame(list(sd_jobs_counts.items()), columns=['Keyword', 'Count'])

sd_jobs_df = sd_jobs_df.sort_values(by='Count', ascending=False)
sd_jobs_df.head(20)




Unnamed: 0,Keyword,Count
77,Developer,106
0,Software Engineer,86
1,Software Developer,32
78,Project Manager,30
34,Data Scientist,29
83,Cloud,24
58,Automation Engineer,23
28,Data Engineer,23
11,Full Stack Engineer,11
21,DevOps Engineer,8


## Software Dev Soft Skills

In [None]:
software_dev_soft_skills = [
    "Problem-Solving", "Critical Thinking", "Debugging & Troubleshooting",
    "Attention to Detail", "Adaptability", "Collaboration & Teamwork",
    "Communication (Technical & Non-Technical)", "Creativity & Innovation",
    "Time Management", "Agile & Scrum Methodologies", "Continuous Learning",
    "Resilience (Handling Bugs & Failures)", "Client & Stakeholder Management",
    "Logical Thinking", "Prioritization", "Problem Solving", "Teamwork", "Collaboration",
    "Self Motivated", "Leadership", "Driven", "Determined", "Self-Motivated", "Flexible",
    "Flexibility", "Creative", "Creativity", "Innovative", "Communication"
]

sd_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in software_dev_soft_skills
}

sd_soft_df = pd.DataFrame(list(sd_soft_counts.items()), columns=['Keyword', 'Count'])

sd_soft_df = sd_soft_df.sort_values(by='Count', ascending=False)
sd_soft_df.head(20)

Unnamed: 0,Keyword,Count
28,Communication,1366
23,Flexible,817
19,Leadership,625
27,Innovative,537
17,Collaboration,436
20,Driven,401
3,Attention to Detail,343
0,Problem-Solving,286
25,Creative,279
24,Flexibility,267


## For Data Analyst

In [None]:
data_analyst_skills = [
    # Programming & Scripting Languages
    "Python", "R", "SQL", "JavaScript", "SAS", "VBA", "Julia", "MATLAB",

    # Data Manipulation & Analysis
    "Pandas", "NumPy", "Dplyr", "Tidyverse", "Data Wrangling", "Data Cleaning", "ETL (Extract, Transform, Load)",
    "Data Transformation", "Exploratory Data Analysis (EDA)", "Data Mining", "Statistical Analysis",

    # Databases & Querying
    "SQL", "MySQL", "PostgreSQL", "MongoDB", "BigQuery", "Snowflake", "Oracle", "NoSQL", "Google Sheets",

    # Data Visualization
    "Tableau", "Power BI", "Looker", "Google Data Studio", "Matplotlib", "Seaborn", "Plotly", "ggplot2",

    # Business Intelligence & Reporting
    "Excel", "Pivot Tables", "DAX (Data Analysis Expressions)", "KPI Analysis", "Dashboarding", "Reporting Automation",

    # Machine Learning & Predictive Analytics (for advanced roles)
    "Scikit-learn", "TensorFlow", "PyTorch", "Regression Analysis", "Time Series Forecasting",
    "Clustering", "Classification", "Natural Language Processing (NLP)", "Machine Learning", 'AI', 'Artificial Intelligence',

    # Cloud & Big Data Technologies
    "AWS Redshift", "Google BigQuery", "Azure Synapse Analytics", "Hadoop", "Spark", "Databricks",

    # Statistical Methods & Mathematics
    "A/B Testing", "Hypothesis Testing", "Probability", "Bayesian Statistics", "ANOVA", "Time Series Analysis",

    # Data Engineering Concepts
    "Data Warehousing", "Data Pipelines", "Airflow", "Kafka", "SQL Optimization",

    # Tools & Platforms
    "Jupyter Notebook", "Google Colab", "Alteryx", "SAP BusinessObjects", "IBM Cognos", "SPSS",

]
da_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum() for keyword in data_analyst_skills
}

da_keyword_df = pd.DataFrame(list(da_keyword_counts.items()), columns=['Keyword', 'Count'])

da_keyword_df = da_keyword_df.sort_values(by='Count', ascending=False)
da_keyword_df.head(30)

Unnamed: 0,Keyword,Count
1,R,2571
50,AI,2484
35,Excel,1306
2,SQL,366
0,Python,272
4,SAS,236
47,Classification,160
49,Machine Learning,124
3,JavaScript,116
24,Oracle,94


## Data Analyst Job Titles

In [None]:
data_analyst_job_titles = [
    # General Data Analyst Roles
    "Data Analyst", "Business Data Analyst", "Financial Data Analyst", "Marketing Data Analyst",
    "Product Data Analyst", "Healthcare Data Analyst", "HR Data Analyst", "Operations Data Analyst", "Business Analyst"

    # Advanced & Specialized Data Analysis Roles
    "Senior Data Analyst", "Lead Data Analyst", "Principal Data Analyst", "Quantitative Analyst",
    "Risk Analyst", "Statistical Analyst", "Customer Data Analyst", "Sales Data Analyst",

    # Business Intelligence & Reporting
    "BI Analyst", "Business Intelligence Analyst", "BI Developer", "Data Visualization Analyst",
    "Reporting Analyst", "Tableau Analyst", "Power BI Analyst", "Excel Analyst",

    # Data Science & Machine Learning Adjacent Roles
    "Data Scientist", "Machine Learning Analyst", "AI Analyst", "Big Data Analyst",
    "Predictive Analytics Specialist", "Data Mining Analyst", "Data Research Analyst",

    # Data Engineering & Database Management
    "Data Engineer", "ETL Analyst", "SQL Data Analyst", "Database Analyst",
    "Data Warehouse Analyst", "Data Quality Analyst", "Data Governance Analyst",

    # Marketing & Customer Insights
    "Customer Insights Analyst", "Marketing Analytics Specialist", "Digital Marketing Analyst",
    "SEO Data Analyst", "Web Analytics Analyst", "Social Media Data Analyst",

    # Finance & Risk Analysis
    "Financial Analyst", "Risk Analyst", "Investment Data Analyst", "Credit Risk Analyst",
    "Fraud Analyst", "Actuarial Analyst",

    # Healthcare & Clinical Data
    "Healthcare Data Analyst", "Clinical Data Analyst", "Pharmaceutical Data Analyst",
    "Bioinformatics Analyst", "Public Health Data Analyst",

    # Supply Chain & Operations Analysis
    "Supply Chain Analyst", "Operations Analyst", "Logistics Analyst", "Manufacturing Data Analyst",
    "Procurement Analyst", "Inventory Data Analyst"

    # Other
    "Database Engineer", "Database Administrator", "Business Intelligence", "Machine Learning", "Machine Learning Engineer",
    "Data Architect", "Applied Data Scientist", "Statistician", "Product Manager", "Data Modeler", "Data Specialist", "Data Journalist"
]

da_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in data_analyst_job_titles
}

da_jobs_df = pd.DataFrame(list(da_jobs_counts.items()), columns=['Keyword', 'Count'])

da_jobs_df = da_jobs_df.sort_values(by='Count', ascending=False)
da_jobs_df.head(20)

Unnamed: 0,Keyword,Count
0,Data Analyst,79
66,Product Manager,76
24,Data Scientist,29
31,Data Engineer,23
44,Financial Analyst,14
60,Business Intelligence,13
61,Machine Learning,4
17,Business Intelligence Analyst,4
54,Operations Analyst,4
63,Data Architect,2


## Data Analyst Soft Skills

In [None]:
data_analytics_soft_skills = [
    "Analytical Thinking", "Critical Thinking", "Problem-Solving",
    "Attention to Detail", "Storytelling with Data", "Business Acumen",
    "Communication & Presentation Skills", "Curiosity & Continuous Learning",
    "Collaboration & Teamwork", "Pattern Recognition", "Adaptability",
    "Time Management", "Stakeholder Engagement", "Decision-Making",
    "Problem Solving", "Collaboration", "Teamwork", "Innovative", "Creative",
    "Creativity", "Storytelling", "Visual", "Visionary", "Driven", "Determined",
    "Presentation", "Self Motivated", "Self-Motivated", "Leadership", "Flexible",
    "Flexibility", "Communication",
]

da_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in data_analytics_soft_skills
}

da_soft_df = pd.DataFrame(list(da_soft_counts.items()), columns=['Keyword', 'Count'])

da_soft_df = da_soft_df.sort_values(by='Count', ascending=False)
da_soft_df.head(20)

Unnamed: 0,Keyword,Count
31,Communication,1366
29,Flexible,817
28,Leadership,625
17,Innovative,537
15,Collaboration,436
23,Driven,401
3,Attention to Detail,343
25,Presentation,323
2,Problem-Solving,286
18,Creative,279


## For Data Scientist

In [None]:
data_scientist_skills = [
    # Programming & Scripting Languages
    "Python", "R", "SQL", "Julia", "Scala", "MATLAB", "Java", "C++",

    # Data Manipulation & Analysis
    "Pandas", "NumPy", "Dask", "Polars", "Data Wrangling", "Data Cleaning", "ETL (Extract, Transform, Load)",
    "Feature Engineering", "Exploratory Data Analysis (EDA)", "Dimensionality Reduction",

    # Databases & Big Data Technologies
    "SQL", "MySQL", "PostgreSQL", "MongoDB", "BigQuery", "Snowflake", "Cassandra", "NoSQL",
    "Hadoop", "Apache Spark", "Databricks", "Presto", "Delta Lake",

    # Machine Learning & Deep Learning
    "Scikit-learn", "TensorFlow", "PyTorch", "Keras", "XGBoost", "LightGBM", "CatBoost",
    "Random Forest", "Gradient Boosting", "Support Vector Machines (SVM)", "Neural Networks",

    # Natural Language Processing (NLP)
    "SpaCy", "NLTK", "Hugging Face Transformers", "BERT", "GPT", "Word Embeddings",
    "Sentiment Analysis", "Text Classification", "Named Entity Recognition (NER)",
    "Machine Learning", 'AI', 'Artificial Intelligence'

    # Computer Vision
    "OpenCV", "YOLO", "Convolutional Neural Networks (CNNs)", "Image Processing", "Object Detection",

    # Statistical Methods & Mathematics
    "A/B Testing", "Hypothesis Testing", "Bayesian Statistics", "Probability", "Linear Algebra",
    "Regression Analysis", "Time Series Analysis", "Markov Chains", "Optimization",

    # Data Visualization
    "Matplotlib", "Seaborn", "Plotly", "ggplot2", "Power BI", "Tableau", "D3.js",

    # Cloud & MLOps
    "AWS", "Google Cloud Platform", "Azure", "MLflow", "Kubeflow", "Docker", "Kubernetes",
    "FastAPI", "Flask", "Airflow", "Feature Store",

    # Deep Learning & AI
    "GANs (Generative Adversarial Networks)", "Reinforcement Learning", "AutoML", "Self-Supervised Learning",

    # Tools & Platforms
    "Jupyter Notebook", "Google Colab", "H2O.ai", "DataRobot", "RapidMiner", "Apache Airflow", "NLP"
]
ds_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum() for keyword in data_scientist_skills
}

ds_keyword_df = pd.DataFrame(list(ds_keyword_counts.items()), columns=['Keyword', 'Count'])

ds_keyword_df = ds_keyword_df.sort_values(by='Count', ascending=False)
ds_keyword_df.head(20)

Unnamed: 0,Keyword,Count
1,R,2571
51,AI,2484
73,AWS,607
2,SQL,366
4,Scala,297
0,Python,272
6,Java,222
65,Optimization,186
75,Azure,146
50,Machine Learning,124


## Data Scientist Job Titles

In [None]:
data_scientist_job_titles = [
    # General Data Science Roles
    "Data Scientist", "Senior Data Scientist", "Lead Data Scientist", "Principal Data Scientist",
    "AI Data Scientist", "ML Data Scientist", "Big Data Scientist",

    # Specialized Data Science Roles
    "Applied Data Scientist", "Deep Learning Scientist", "Quantitative Data Scientist",
    "Computational Data Scientist", "Data Science Consultant", "Research Data Scientist",

    # Machine Learning & AI Focused Roles
    "Machine Learning Engineer", "AI Engineer", "Deep Learning Engineer",
    "Natural Language Processing (NLP) Engineer", "Computer Vision Engineer",
    "Reinforcement Learning Researcher", "AI Research Scientist", "Conversational AI Engineer",

    # Data Engineering & Infrastructure
    "Data Science Engineer", "MLOps Engineer", "Big Data Engineer", "Data Architect",
    "Cloud Data Scientist", "ETL Data Scientist", "Data Pipeline Engineer",

    # Business & Decision Science Roles
    "Business Data Scientist", "Decision Scientist", "Marketing Data Scientist",
    "Product Data Scientist", "Growth Data Scientist", "Customer Data Scientist",

    # Finance, Healthcare, & Industry-Specific Data Science
    "Financial Data Scientist", "Quantitative Researcher", "Risk Data Scientist",
    "Healthcare Data Scientist", "Bioinformatics Data Scientist", "Pharmaceutical Data Scientist",
    "Agricultural Data Scientist", "Environmental Data Scientist",

    # Advanced Research & Academic Roles
    "AI Researcher", "Computational Scientist", "Statistical Data Scientist",
    "Data Science Researcher", "Predictive Modeling Scientist", "Algorithm Engineer",
    "Experimental Data Scientist"

    # Other
    "Machine Learning Scientist", "Data Engineer", "Data Architect", "Data Storyteller", "Data Manager",
    "BI Developer", "Business Intellegence Developer", "Database Administrator", "Statistician", "Data Privacy Officer",
    "AI", "AI Engineer", "Artificial Intelligence", "Machine Learning", "Cloud Engineer", "Cloud Computing"
]

ds_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in data_scientist_job_titles
}

ds_jobs_df = pd.DataFrame(list(ds_jobs_counts.items()), columns=['Keyword', 'Count'])

ds_jobs_df = ds_jobs_df.sort_values(by='Count', ascending=False)
ds_jobs_df.head(20)


Unnamed: 0,Keyword,Count
57,AI,221
0,Data Scientist,29
49,Data Engineer,23
3,Principal Data Scientist,10
2,Lead Data Scientist,4
59,Machine Learning,4
14,AI Engineer,3
60,Cloud Engineer,3
24,Data Architect,2
55,Statistician,1


# Data Scientist Soft Skills

In [None]:
data_science_soft_skills = [
    "Problem-Solving", "Logical & Analytical Thinking", "Experimentation & Research Mindset",
    "Communication & Storytelling", "Business Acumen", "Domain Knowledge",
    "Creativity & Innovation", "Critical Thinking", "Resilience (Iterating Over Models)",
    "Curiosity & Continuous Learning", "Time Management",
    "Collaboration with Cross-Functional Teams", "Ethical Considerations in AI",
    "Handling Ambiguity", "Problem Solving", "Collaboration", "Teamwork", "Innovative", "Creative",
    "Creativity", "Storytelling", "Visual", "Visionary", "Driven", "Determined",
    "Presentation", "Self Motivated", "Self-Motivated", "Leadership", "Flexible",
    "Flexibility", "Communication"
]

ds_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in data_science_soft_skills
}

ds_soft_df = pd.DataFrame(list(ds_soft_counts.items()), columns=['Keyword', 'Count'])

ds_soft_df = ds_soft_df.sort_values(by='Count', ascending=False)
ds_soft_df.head(20)

Unnamed: 0,Keyword,Count
31,Communication,1366
29,Flexible,817
28,Leadership,625
17,Innovative,537
15,Collaboration,436
23,Driven,401
25,Presentation,323
0,Problem-Solving,286
18,Creative,279
21,Visual,277


## For Enterprise Systems

In [None]:
enterprise_systems_skills = [
    # Enterprise Software & ERP Systems
    "SAP", "Oracle ERP", "Microsoft Dynamics", "NetSuite", "PeopleSoft", "Workday", "Salesforce", "ServiceNow",
    "JD Edwards", "Infor ERP", "Epicor ERP", "Excel" "SQL", "Enterprise", "Finance"

    # Databases & Data Management
    "SQL Server", "Oracle Database", "PostgreSQL", "MySQL", "MongoDB", "Redis", "DB2", "Teradata",
    "Data Warehousing", "ETL", "Snowflake", "BigQuery",

    # Cloud & Infrastructure
    "AWS", "Microsoft Azure", "Google Cloud Platform", "Hybrid Cloud", "Kubernetes", "Docker", "Terraform",
    "OpenShift", "VMware", "Active Directory", "IAM", "Identity and Access Management",

    # Business Intelligence & Analytics
    "Power BI", "Tableau", "QlikView", "SAP BusinessObjects", "Cognos Analytics", "SAS", "Looker",
    "Data Modeling", "OLAP", "Online Analytical Processing",

    # Programming & Development
    "Java", "Python", "C#", ".NET Framework", "JavaScript", "TypeScript", "Node.js", "Bash Scripting",
    "PowerShell", "Apex",

    # Middleware & Integration
    "API Management", "MuleSoft", "IBM WebSphere", "Tibco", "Apache Kafka", "RabbitMQ", "Web Services",
    "SOAP", "REST", "GraphQL",

    # Security & Compliance
    "ISO 27001", "SOC 2 Compliance", "GDPR Compliance", "NIST Framework", "SIEM", "Security Information and Event Management",
    "IAM", "Identity and Access Management",

    # DevOps & IT Operations
    "CI/CD", "Continuous Integration", "Continuous Deployment", "Jenkins", "GitHub Actions", "Ansible",
    "Puppet", "Chef", "Monitoring Tools", "Nagios", "Splunk", "New Relic",

    # Enterprise Architecture & IT Governance
    "TOGAF", "ITIL", "COBIT", "Zachman Framework"

    # Other
    "ERP", "SAP", "Oracle", "Microsoft Dynamics", "NetSuite", "Business Systems", "Enterprise Architect",
    "CRM", "Workday", "PeopleSoft", "System Analyst", "Enterprise Engineer", "Business Applications",
    "Process Automation", "IT Operations", "Enterprise Integration", "IT Consultant", "Enterprise Solutions"
]

es_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum() for keyword in enterprise_systems_skills
}

es_keyword_df = pd.DataFrame(list(es_keyword_counts.items()), columns=['Keyword', 'Count'])

es_keyword_df = es_keyword_df.sort_values(by='Count', ascending=False)
es_keyword_df.head(20)

Unnamed: 0,Keyword,Count
65,REST,840
25,AWS,607
12,Enterprise,275
48,Python,272
42,SAS,236
47,Java,222
0,SAP,120
51,JavaScript,116
89,Oracle,94
49,C#,93


## Enterprise Systems Job Titles

In [None]:
enterprise_systems_job_titles = [
    # General Enterprise Systems Roles
    "Enterprise Systems Engineer", "Enterprise Architect", "Enterprise Systems Administrator",
    "Enterprise Software Engineer", "Enterprise IT Specialist", "Enterprise Solutions Architect",

    # ERP (Enterprise Resource Planning) Roles
    "ERP Consultant", "ERP Administrator", "ERP Analyst", "ERP Developer",
    "SAP Consultant", "SAP Analyst", "SAP Administrator", "SAP Developer",
    "Oracle ERP Consultant", "Oracle ERP Analyst", "NetSuite Consultant",
    "Microsoft Dynamics Consultant", "Workday Consultant",

    # Enterprise Application & Integration Roles
    "Enterprise Application Engineer", "Enterprise Application Administrator",
    "Enterprise Systems Integration Specialist", "Enterprise Applications Analyst",
    "Enterprise Application Developer", "Middleware Engineer", "API Integration Engineer",

    # Business Intelligence & Data Roles
    "Enterprise Data Analyst", "Enterprise Data Engineer", "Enterprise Data Scientist",
    "BI Developer", "BI Architect", "BI Analyst", "Data Warehouse Engineer",
    "Data Governance Specialist", "Master Data Management (MDM) Analyst",

    # Cloud & Infrastructure Enterprise Roles
    "Cloud Solutions Architect", "Cloud Systems Engineer", "Enterprise Cloud Engineer",
    "Enterprise Infrastructure Engineer", "Enterprise Cloud Administrator",
    "Cloud Security Engineer", "Hybrid Cloud Engineer", "Multi-Cloud Solutions Architect",

    # Systems & Network Administration
    "Enterprise Systems Administrator", "Enterprise Network Engineer",
    "Systems Analyst", "IT Systems Engineer", "IT Infrastructure Engineer",
    "Enterprise IT Administrator", "Identity and Access Management (IAM) Engineer",

    # Security & Compliance Roles
    "Enterprise Security Architect", "Enterprise Risk Analyst", "Enterprise Compliance Officer",
    "IAM Specialist", "Governance, Risk & Compliance (GRC) Analyst",
    "Enterprise Cybersecurity Engineer", "Enterprise SOC Analyst",

    # Enterprise Software Development & Customization
    "Enterprise Software Developer", "Enterprise Web Developer", "Enterprise AI Engineer",
    "Enterprise DevOps Engineer", "Enterprise Software Architect",
    "CRM Developer", "CRM Consultant", "Salesforce Developer", "Salesforce Consultant",

    # Management & Leadership Roles
    "Enterprise IT Manager", "Enterprise Systems Director", "Enterprise Applications Manager",
    "Enterprise Solutions Manager", "Chief Information Officer (CIO)",
    "Chief Technology Officer (CTO)", "VP of Enterprise Systems",
    "Enterprise IT Operations Manager"

    # Other
    "Business Systems Analyst", "Systems Analyst", "Systems Administrator", "Project Manager",
    "Cloud Architect", "Cloud Engineer", "Computer Systems Manager", "Computer Systems Analyst",
    "Database Administrator", "Sales Analyst", "Information Design"
]

es_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in enterprise_systems_job_titles
}

es_jobs_df = pd.DataFrame(list(es_jobs_counts.items()), columns=['Keyword', 'Count'])

es_jobs_df = es_jobs_df.sort_values(by='Count', ascending=False)
es_jobs_df.head(20)



Unnamed: 0,Keyword,Count
74,Project Manager,30
80,Sales Analyst,12
44,Systems Analyst,8
76,Cloud Engineer,3
1,Enterprise Architect,2
75,Cloud Architect,2
31,BI Analyst,2
73,Systems Administrator,1
51,Enterprise Compliance Officer,0
56,Enterprise Software Developer,0


## Enterprise Systems Soft Skills

In [None]:
enterprise_systems_soft_skills = [
    "Systems Thinking", "Business Process Understanding", "Stakeholder Management",
    "Communication & Collaboration", "Attention to Detail", "Problem-Solving",
    "Decision-Making", "Documentation & Reporting", "Time Management",
    "Adaptability to Change (New ERPs & Software)", "Project Management",
    "Conflict Resolution", "IT Governance & Compliance Awareness", "Change Management",
    "Problem Solving", "Collaboration", "Teamwork", "Innovative", "Creative",
    "Creativity", "Storytelling", "Visual", "Visionary", "Driven", "Determined",
    "Presentation", "Self Motivated", "Self-Motivated", "Leadership", "Flexible",
    "Flexibility", "Communication"
]

es_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in enterprise_systems_soft_skills
}

es_soft_df = pd.DataFrame(list(es_soft_counts.items()), columns=['Keyword', 'Count'])

es_soft_df = es_soft_df.sort_values(by='Count', ascending=False)
es_soft_df.head(20)




Unnamed: 0,Keyword,Count
31,Communication,1366
29,Flexible,817
28,Leadership,625
17,Innovative,537
15,Collaboration,436
23,Driven,401
4,Attention to Detail,343
25,Presentation,323
10,Project Management,292
5,Problem-Solving,286


## For Systems and Security

In [None]:
systems_security_skills = [
    # Cybersecurity Fundamentals
    "Cybersecurity", "Information Security", "Network Security", "Ethical Hacking", "Penetration Testing",
    "Vulnerability Assessment", "Security Risk Management", "Incident Response", "Threat Intelligence",
    "Zero Trust Architecture", "Security Awareness Training",

    # Security Frameworks & Compliance
    "ISO 27001", "NIST", "SOC 2", "GDPR", "CIS Controls", "COBIT", "HIPAA", "FISMA", "PCI DSS", "FedRAMP",
    "Risk Assessment", "Security Auditing", "Compliance",

    # Network Security & Infrastructure
    "Firewall", "VPN", "IDS", "IPS", "Load Balancer", "Proxy Server", "DDoS Mitigation",
    "Network Segmentation", "SSL/TLS", "IPSec", "Wireshark", "Packet Analysis", "802.1X",
    "Network Forensics", "SIEM", "Security Information and Event Management",

    # Cloud Security
    "AWS Security", "Azure Security", "Google Cloud Security", "Cloud Security Posture Management (CSPM)",
    "Identity and Access Management (IAM)", "Zero Trust Security", "Cloud Workload Protection", "CASB",
    "Cloud Access Security Broker", "Container Security", "Kubernetes Security",

    # Identity & Access Management (IAM)
    "Multi-Factor Authentication (MFA)", "Single Sign-On (SSO)", "Privileged Access Management (PAM)",
    "Active Directory", "LDAP", "OAuth", "SAML", "Kerberos", "Federated Identity Management",

    # Secure Development & Application Security
    "Secure Coding", "OWASP", "Web Application Security", "Static Code Analysis", "Dynamic Code Analysis",
    "Secure SDLC", "DevSecOps", "Software Composition Analysis (SCA)", "API Security",
    "Threat Modeling", "Code Review", "Bug Bounty Programs",

    # Endpoint & Device Security
    "Endpoint Detection and Response (EDR)", "Extended Detection and Response (XDR)", "Mobile Device Management (MDM)",
    "Antivirus", "EDR Solutions", "BitLocker", "Microsoft Defender", "CrowdStrike", "Carbon Black",
    "McAfee", "Symantec", "Norton", "Sophos",

    # Digital Forensics & Incident Response (DFIR)
    "Digital Forensics", "Memory Forensics", "Disk Forensics", "Forensic Analysis", "Log Analysis",
    "SIEM Investigation", "Threat Hunting", "Security Orchestration, Automation, and Response (SOAR)",
    "MITRE ATT&CK Framework", "Cyber Kill Chain", "Purple Teaming", "Red Teaming", "Blue Teaming",

    # Programming & Scripting for Security
    "Python", "Bash", "PowerShell", "C", "C++", "Go", "Rust", "JavaScript", "SQL Injection Prevention",
    "Reverse Engineering", "Malware Analysis", "Exploit Development",

    # Cryptography & Encryption
    "Cryptography", "Public Key Infrastructure (PKI)", "Encryption", "TLS Certificates", "Hashing",
    "AES", "RSA", "Elliptic Curve Cryptography (ECC)", "SSL/TLS", "Homomorphic Encryption", "Quantum Cryptography",

    # DevOps & Security Automation
    "CI/CD Security", "Infrastructure as Code (IaC)", "Terraform Security", "Ansible Security",
    "Docker Security", "Kubernetes Security", "Secrets Management", "Vault", "HashiCorp Vault",

    # Security Monitoring & SIEM Tools
    "Splunk", "ELK Stack", "QRadar", "LogRhythm", "Microsoft Sentinel", "AlienVault", "Graylog",
    "Threat Intelligence Platforms", "Threat Feeds", "Indicator of Compromise (IOC)", "YARA Rules",

    # Security Tools & Penetration Testing
    "Burp Suite", "Nmap", "Metasploit", "Kali Linux", "Aircrack-ng", "Nikto", "OpenVAS", "Snort",
    "OSINT", "Shodan", "Maltego", "John the Ripper", "Hashcat", "BloodHound", "Responder",

    # Security Certifications
    "CISSP", "CEH", "CISM", "OSCP", "CompTIA Security+", "CompTIA CySA+", "CompTIA CASP+",
    "CompTIA PenTest+", "GSEC", "GIAC", "CISA", "CCSP", "AWS Certified Security Specialty",
    "Azure Security Engineer Associate", "Google Cloud Security Engineer"

    # Other
    "Cybersecurity", "Security", "Network Security", "System Security", "Penetration Testing",
    "SOC Analyst", "Information Security", "Threat Analysis", "Firewall", "Cloud Security",
    "Ethical Hacking", "IAM", "Risk Management", "Compliance", "Data Protection", "Incident Response",
    "IT Auditor", "Vulnerability", "Forensics", "Zero Trust", "Endpoint Security", "DevSecOps"
]


ss_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum() for keyword in systems_security_skills
}

ss_keyword_df = pd.DataFrame(list(ss_keyword_counts.items()), columns=['Keyword', 'Count'])

ss_keyword_df = ss_keyword_df.sort_values(by='Count', ascending=False)
ss_keyword_df.head(20)

Unnamed: 0,Keyword,Count
101,C,2571
103,Go,1871
27,IPS,684
12,NIST,614
169,Security,586
23,Compliance,539
104,Rust,415
98,Python,272
116,RSA,148
26,IDS,121


## Systems and Security Job Titles

In [None]:
systems_security_job_titles = [
    # General Systems & Security Roles
    "Systems Engineer", "IT Security Engineer", "Cybersecurity Engineer",
    "Security Analyst", "Systems Analyst", "IT Security Analyst",
    "Information Security Specialist", "IT Security Specialist",

    # Network & Infrastructure Security
    "Network Security Engineer", "Network Security Analyst", "Firewall Engineer",
    "Cloud Security Engineer", "Infrastructure Security Engineer",
    "Wireless Security Engineer", "SOC Analyst", "Security Operations Engineer",

    # Identity & Access Management (IAM)
    "IAM Engineer", "Identity & Access Management Specialist", "IAM Administrator",
    "Access Control Analyst", "Privileged Access Management (PAM) Engineer",
    "Active Directory Engineer", "Directory Services Engineer",

    # Application & Software Security
    "Application Security Engineer", "Secure Software Engineer",
    "DevSecOps Engineer", "Security Software Developer",
    "Penetration Tester", "Ethical Hacker", "Vulnerability Researcher",
    "Red Team Engineer", "Blue Team Engineer", "Purple Team Engineer",

    # Governance, Risk & Compliance (GRC)
    "Cyber Risk Analyst", "Governance, Risk, and Compliance Analyst",
    "Compliance Officer", "Security Auditor", "IT Risk Analyst",
    "Information Assurance Engineer", "Security Policy Analyst",
    "ISO 27001 Auditor", "PCI Compliance Specialist",

    # Cloud & DevOps Security
    "Cloud Security Architect", "Cloud Security Consultant",
    "Cloud Compliance Engineer", "DevSecOps Specialist",
    "Cloud IAM Specialist", "Kubernetes Security Engineer",
    "AWS Security Engineer", "Azure Security Engineer",
    "Google Cloud Security Engineer",

    # Security Architecture & Leadership
    "Security Architect", "Enterprise Security Architect",
    "Security Solutions Architect", "CISO (Chief Information Security Officer)",
    "Head of Cybersecurity", "Director of Security", "VP of Security",

    # Digital Forensics & Incident Response (DFIR)
    "Incident Response Analyst", "Forensic Analyst",
    "Threat Intelligence Analyst", "Cyber Threat Hunter",
    "Malware Analyst", "Reverse Engineer", "SOC Manager",

    # Specialized Security Roles
    "Zero Trust Architect", "IoT Security Engineer", "ICS/SCADA Security Engineer",
    "Blockchain Security Engineer", "AI Security Researcher",
    "Quantum Cryptography Specialist", "Security Automation Engineer",
    "Cybersecurity Trainer", "Cybersecurity Researcher"

    # Other
    "Security Engineer", "Security Analyst", "Security Administrator", "Security Architect", "Penetration Tester",
    "Penetration Testing", "Security Consultant", "Information Security", "Information Manager", "Risk Analyst"
]

ss_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in systems_security_job_titles
}

ss_jobs_df = pd.DataFrame(list(ss_jobs_counts.items()), columns=['Keyword', 'Count'])

ss_jobs_df = ss_jobs_df.sort_values(by='Count', ascending=False)
ss_jobs_df.head(20)



Unnamed: 0,Keyword,Count
4,Systems Analyst,8
0,Systems Engineer,5
60,Threat Intelligence Analyst,4
23,Application Security Engineer,2
3,Security Analyst,2
25,DevSecOps Engineer,2
77,Information Security,1
76,Security Consultant,1
37,IT Risk Analyst,1
79,Risk Analyst,1


## Systems and Security Soft Skills

In [None]:
systems_security_soft_skills = [
    "Critical Thinking", "Risk Assessment & Decision-Making", "Attention to Detail",
    "Ethical Judgment & Integrity", "Problem-Solving",
    "Communication (Explaining Security Risks Clearly)", "Adaptability (Handling New Threats)",
    "Resilience (Handling Incidents & Breaches)", "Teamwork & Collaboration",
    "Incident Response Under Pressure", "Continuous Learning & Curiosity",
    "Documentation & Reporting", "Confidentiality & Discretion",
    "Awareness of Compliance & Regulations", "Ethical", "Problem Solving", "Innovative", "Communication",
    "Leadership", "Awareness", "Flexibility", "Adaptability", "Collaboration", "Works well under pressure",
    "Risk Assessment", "Integrity", "Honest", "Honesty"
]

ss_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in systems_security_soft_skills
}

ss_soft_df = pd.DataFrame(list(ss_soft_counts.items()), columns=['Keyword', 'Count'])

ss_soft_df = ss_soft_df.sort_values(by='Count', ascending=False)
ss_soft_df.head(20)


Unnamed: 0,Keyword,Count
17,Communication,1366
18,Leadership,625
16,Innovative,537
22,Collaboration,436
2,Attention to Detail,343
25,Integrity,302
4,Problem-Solving,286
20,Flexibility,267
15,Problem Solving,140
19,Awareness,123


## For Digital Media

In [None]:
digital_media_skills = [
    # Content Creation & Production
    "Content Creation", "Copywriting", "Content Strategy", "Creative Writing", "Scriptwriting",
    "Blogging", "Technical Writing", "SEO Writing", "Storytelling", "Editorial Planning",

    # Graphic Design & Multimedia
    "Adobe Photoshop", "Adobe Illustrator", "Adobe InDesign", "Canva", "Figma", "Sketch",
    "UI/UX Design", "Typography", "Visual Design", "Brand Identity", "Graphic Design",
    "Motion Graphics", "Adobe After Effects", "Adobe Premiere Pro", "Final Cut Pro",
    "Video Production", "Video Editing", "3D Animation", "2D Animation", "Blender",
    "Cinema 4D", "Maya", "Animation", "VFX", "Digital Illustration", "Infographics",

    # Social Media Management
    "Social Media Marketing", "Social Media Strategy", "Instagram Marketing", "Facebook Marketing",
    "LinkedIn Marketing", "Twitter Marketing", "TikTok Marketing", "YouTube Marketing", "Pinterest Marketing",
    "Community Management", "Influencer Marketing", "Social Listening", "Viral Marketing",
    "Hashtag Strategy", "Social Media Analytics", "Hootsuite", "Buffer", "Sprout Social",

    # Digital Marketing & Advertising
    "SEO", "SEM", "Google Ads", "Facebook Ads", "PPC", "Affiliate Marketing",
    "Google Analytics", "Google Tag Manager", "Conversion Rate Optimization (CRO)",
    "A/B Testing", "Email Marketing", "Marketing Automation", "HubSpot", "Mailchimp",
    "Klaviyo", "Campaign Management", "Lead Generation", "Brand Management",

    # Web Development & Digital Platforms
    "WordPress", "Web Design", "HTML", "CSS", "JavaScript", "Webflow", "Shopify",
    "E-commerce", "Landing Page Optimization", "Responsive Design", "UX Research",

    # Data Analytics & Performance Tracking
    "Data Visualization", "Google Data Studio", "Tableau", "Looker", "Adobe Analytics",
    "Social Media Insights", "KPI Tracking", "Engagement Metrics", "ROI Analysis",

    # Audio & Podcasting
    "Podcasting", "Audio Editing", "Audacity", "Adobe Audition", "GarageBand",
    "Voiceover", "Sound Design", "Mixing and Mastering", "Spotify Podcasting",

    # Streaming & Live Media
    "Twitch Streaming", "YouTube Live", "OBS Studio", "Restream", "Livestream Production",
    "Virtual Events", "Webinar Hosting", "Live Video Editing",

    # Digital Storytelling & Branding
    "Brand Storytelling", "Creative Direction", "Art Direction", "Marketing Strategy",
    "Interactive Media", "User Engagement", "Multimedia Production", "Visual Communication",

    # Emerging Technologies & Trends
    "Augmented Reality (AR)", "Virtual Reality (VR)", "Metaverse", "NFTs", "AI in Media",
    "Chatbots", "Interactive Video", "AI-Generated Content", "Deepfake Technology"

    # Other
    "Digital Media", "Content Creator", "Video Editor", "Graphic Designer", "Multimedia", "Animation",
    "Social Media", "Marketing", "SEO", "Adobe", "UX/UI Designer", "Web Designer", "Creative Director",
    "Branding", "Visual Design", "Content Marketing", "Social Media Manager", "User Experience",
    "Digital Marketing", "Motion Graphics", "Visual Effects", "Content Strategy", "Influencer", "Copywriting",
    "UI", "UX", "Design"
]


dm_keyword_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum() for keyword in digital_media_skills
}

dm_keyword_df = pd.DataFrame(list(dm_keyword_counts.items()), columns=['Keyword', 'Count'])

dm_keyword_df = dm_keyword_df.sort_values(by='Count', ascending=False)
dm_keyword_df.head(20)


Unnamed: 0,Keyword,Count
143,UI,2483
145,Design,1120
55,SEM,745
131,Marketing,278
144,UX,275
76,JavaScript,116
85,Tableau,87
130,Social Media,85
74,HTML,77
83,Data Visualization,66


## Digital Media Job Titles

In [None]:
digital_media_job_titles = [
    # General Digital Media Roles
    "Digital Media Specialist", "Digital Media Manager", "Digital Content Creator",
    "Digital Marketing Specialist", "Digital Marketing Manager", "Social Media Manager",
    "Content Marketing Manager", "Brand Marketing Manager",

    # Content Creation & Strategy
    "Content Creator", "Content Strategist", "Multimedia Content Producer",
    "Copywriter", "Content Editor", "Technical Writer", "Creative Writer",
    "Scriptwriter", "Editorial Manager", "SEO Content Writer",

    # Social Media & Community Management
    "Social Media Coordinator", "Social Media Strategist", "Social Media Analyst",
    "Community Manager", "Influencer Marketing Manager", "Social Media Consultant",
    "Social Media Advertiser", "Social Media Engagement Specialist",

    # Graphic Design & Visual Media
    "Graphic Designer", "Motion Graphic Designer", "Visual Designer",
    "Brand Identity Designer", "Illustrator", "Infographic Designer",
    "UX/UI Designer", "Web Designer", "Interactive Media Designer",

    # Video Production & Editing
    "Video Editor", "Videographer", "Motion Graphics Artist",
    "Cinematographer", "Video Producer", "Film Editor",
    "YouTube Content Creator", "Live Stream Producer",

    # Audio Production & Podcasting
    "Podcast Producer", "Podcast Editor", "Sound Engineer",
    "Audio Editor", "Voiceover Artist", "Radio Producer",
    "Music Production Specialist", "Audiobook Producer",

    # Advertising & Paid Media
    "Paid Media Specialist", "Digital Advertising Manager",
    "PPC Specialist", "Google Ads Manager", "Facebook Ads Specialist",
    "Programmatic Media Buyer", "Performance Marketing Manager",
    "Affiliate Marketing Manager", "E-commerce Marketing Manager",

    # Web & Digital Experience
    "Web Content Manager", "Digital Experience Manager",
    "Conversion Rate Optimization (CRO) Specialist", "UX Writer",
    "Digital Designer", "E-commerce Specialist", "Web Producer",

    # Data & Analytics in Digital Media
    "Digital Analytics Specialist", "SEO Analyst", "Web Analytics Manager",
    "Marketing Data Analyst", "Social Media Insights Analyst",
    "Audience Development Manager",

    # Emerging Digital Media & Innovation
    "AR/VR Content Creator", "Metaverse Content Strategist",
    "AI-Generated Content Specialist", "NFT Marketing Specialist",
    "Interactive Media Producer", "Digital Innovation Strategist"

    # Other
    "Digital Media", "Content Creator", "Video Editor", "Graphic Designer", "Multimedia", "Animation",
    "Social Media", "Marketing", "SEO", "Adobe", "UX/UI Designer", "Web Designer", "Creative Director",
    "Branding", "Visual Design", "Content Marketing", "Social Media Manager", "User Experience", "UI", "UX", "Design",
    "Copywriting", "Social Media Manager", "Content Strategy", "Marketing Analyst", "Graphic Designer"
]

dm_jobs_counts = {
    keyword: merged_df['job_title'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in digital_media_job_titles
}

dm_jobs_df = pd.DataFrame(list(dm_jobs_counts.items()), columns=['Keyword', 'Count'])

dm_jobs_df = dm_jobs_df.sort_values(by='Count', ascending=False)
dm_jobs_df.head(20)

Unnamed: 0,Keyword,Count
90,UI,51
92,Design,33
82,Marketing,31
91,UX,16
32,UX/UI Designer,2
33,Web Designer,2
13,Technical Writer,1
89,User Experience,1
64,Digital Designer,1
21,Community Manager,1


# Digital Media Soft Skills

In [None]:
digital_media_soft_skills = [
    "Creativity & Innovation", "Storytelling & Narrative Skills", "Communication & Branding",
    "Visual Thinking", "Attention to Detail", "Adaptability to Trends & Platforms",
    "Collaboration & Teamwork", "Client & Stakeholder Management",
    "Time Management & Deadlines", "Emotional Intelligence (Understanding Audience Needs)",
    "Persuasion & Marketing Insight", "Problem-Solving", "Project Management",
    "Data-Driven Decision-Making (SEO, Social Media Analytics)", "Decision Making", "Creativity",
    "Collaboration", "Teamwork", "Inspired", "Self-Motivated", "Insightful", "Creative"
]

dm_soft_counts = {
    keyword: merged_df['job_description'].str.contains(re.escape(keyword), case=False, na=False).sum()
    for keyword in digital_media_soft_skills
}

dm_soft_df = pd.DataFrame(list(dm_soft_counts.items()), columns=['Keyword', 'Count'])

dm_soft_df = dm_soft_df.sort_values(by='Count', ascending=False)
dm_soft_df.head(20)

Unnamed: 0,Keyword,Count
16,Collaboration,436
4,Attention to Detail,343
12,Project Management,292
11,Problem-Solving,286
21,Creative,279
17,Teamwork,148
15,Creativity,97
19,Self-Motivated,72
14,Decision Making,64
18,Inspired,35
