## Making Job Skills Dataset
Dataset with required skills to recommend courses

In [None]:
import pandas as pd
import numpy as np

In [None]:
it_jobs = pd.read_csv("/content/it_jobs.csv")

In [None]:
it_jobs.head()

Unnamed: 0,Job Title,Skills,Job Description
0,ACCESSIBILITY SPECIALIST,"web accessibility guidelines, html, css, javas...",Ensures digital products meet accessibility st...
1,ADMIN BIG DATA,"big data management, hadoop, spark, data wareh...","Administers and manages large datasets, ensuri..."
2,AGILE PROJECT MANAGER,"agile methodologies, scrum, kanban, project ma...","Leads projects using agile frameworks, focusin..."
3,ANDROID DEVELOPER,"java, kotlin, android sdk, mobile app developm...",Develops mobile applications for Android devic...
4,ANSIBLE AUTOMATION ENGINEER,"ansible, automation scripts, linux, networking...",Designs and implements automation solutions us...


In [None]:
import re
import nltk
from nltk.stem import WordNetLemmatizer

# Download the wordnet and omw-1.4 corpus
nltk.download('wordnet')
nltk.download('omw-1.4')

# Initialize Lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to preprocess and normalize skills
def preprocess_skills(skill_text, is_job=True):
    # Convert to lowercase
    skill_text = skill_text.lower()

    # Remove special characters and extra spaces
    skill_text = re.sub(r'[^\w\s]', '', skill_text)

    # Split skills into words
    skills = skill_text.split()

    if is_job:
        # If it's job skills, split multi-word phrases into individual words
        # Here you can treat certain multi-word phrases as separate tokens if needed
        # e.g., 'cloud computing' -> ['cloud', 'computing']
        cleaned_skills = [lemmatizer.lemmatize(skill.strip()) for skill in skills]
    else:
        # For courses, assuming they are already in individual words
        cleaned_skills = [lemmatizer.lemmatize(skill.strip()) for skill in skills]

    return cleaned_skills

# Apply the preprocessing function to job skills and course skills
it_jobs['Cleaned_Job_Skills'] = it_jobs['Skills'].apply(preprocess_skills, is_job=True)

# For courses, assuming you have another DataFrame 'courses_df' with skills column:
# courses_df['Cleaned_Course_Skills'] = courses_df['Skills'].apply(preprocess_skills, is_job=False)

# View the result
it_jobs[['Job Title', 'Cleaned_Job_Skills']]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Unnamed: 0,Job Title,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"[web, accessibility, guideline, html, cs, java..."
1,ADMIN BIG DATA,"[big, data, management, hadoop, spark, data, w..."
2,AGILE PROJECT MANAGER,"[agile, methodology, scrum, kanban, project, m..."
3,ANDROID DEVELOPER,"[java, kotlin, android, sdk, mobile, app, deve..."
4,ANSIBLE AUTOMATION ENGINEER,"[ansible, automation, script, linux, networkin..."
...,...,...
284,EFFECTS TECHNICAL DIRECTOR (FX TD),"[visual, effect, technical, expertise, scripti..."
285,LAYOUT ARTIST,"[layout, cinematography, 3d, modeling, visual,..."
286,AI RESEARCHER,"[aiml, algorithm, python, research, methodolog..."
287,AI SOFTWARE ARCHITECT,"[aiml, architecture, software, design, cloud, ..."


In [None]:
it_jobs.iat[1,1]

'big data management, hadoop, spark, data warehousing, sql'

In [None]:
file_path = '/content/it_jobs.csv'


it_jobs.to_csv(file_path, index=False)

print(f"CSV file has been saved to {file_path}")


CSV file has been saved to /content/it_jobs.csv


In [None]:
it_jobs

Unnamed: 0,Job Title,Skills,Job Description,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"web accessibility guidelines, html, css, javas...",Ensures digital products meet accessibility st...,"[web, accessibility, guideline, html, cs, java..."
1,ADMIN BIG DATA,"big data management, hadoop, spark, data wareh...","Administers and manages large datasets, ensuri...","[big, data, management, hadoop, spark, data, w..."
2,AGILE PROJECT MANAGER,"agile methodologies, scrum, kanban, project ma...","Leads projects using agile frameworks, focusin...","[agile, methodology, scrum, kanban, project, m..."
3,ANDROID DEVELOPER,"java, kotlin, android sdk, mobile app developm...",Develops mobile applications for Android devic...,"[java, kotlin, android, sdk, mobile, app, deve..."
4,ANSIBLE AUTOMATION ENGINEER,"ansible, automation scripts, linux, networking...",Designs and implements automation solutions us...,"[ansible, automation, script, linux, networkin..."
...,...,...,...,...
284,EFFECTS TECHNICAL DIRECTOR (FX TD),"visual effects, technical expertise, scripting...",Develops technical solutions for visual effect...,"[visual, effect, technical, expertise, scripti..."
285,LAYOUT ARTIST,"layout, cinematography, 3d modeling, visual st...","Creates layouts for animated scenes, determini...","[layout, cinematography, 3d, modeling, visual,..."
286,AI RESEARCHER,"ai/ml algorithms, python, research methodologi...",Conducts research in artificial intelligence a...,"[aiml, algorithm, python, research, methodolog..."
287,AI SOFTWARE ARCHITECT,"ai/ml architecture, software design, cloud com...","Designs and implements AI software systems, en...","[aiml, architecture, software, design, cloud, ..."


In [None]:
df = pd.read_csv('/content/it_jobs.csv')

In [None]:
df

Unnamed: 0,Job Title,Skills,Job Description,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"web accessibility guidelines, html, css, javas...",Ensures digital products meet accessibility st...,"['web', 'accessibility', 'guideline', 'html', ..."
1,ADMIN BIG DATA,"big data management, hadoop, spark, data wareh...","Administers and manages large datasets, ensuri...","['big', 'data', 'management', 'hadoop', 'spark..."
2,AGILE PROJECT MANAGER,"agile methodologies, scrum, kanban, project ma...","Leads projects using agile frameworks, focusin...","['agile', 'methodology', 'scrum', 'kanban', 'p..."
3,ANDROID DEVELOPER,"java, kotlin, android sdk, mobile app developm...",Develops mobile applications for Android devic...,"['java', 'kotlin', 'android', 'sdk', 'mobile',..."
4,ANSIBLE AUTOMATION ENGINEER,"ansible, automation scripts, linux, networking...",Designs and implements automation solutions us...,"['ansible', 'automation', 'script', 'linux', '..."
...,...,...,...,...
284,EFFECTS TECHNICAL DIRECTOR (FX TD),"visual effects, technical expertise, scripting...",Develops technical solutions for visual effect...,"['visual', 'effect', 'technical', 'expertise',..."
285,LAYOUT ARTIST,"layout, cinematography, 3d modeling, visual st...","Creates layouts for animated scenes, determini...","['layout', 'cinematography', '3d', 'modeling',..."
286,AI RESEARCHER,"ai/ml algorithms, python, research methodologi...",Conducts research in artificial intelligence a...,"['aiml', 'algorithm', 'python', 'research', 'm..."
287,AI SOFTWARE ARCHITECT,"ai/ml architecture, software design, cloud com...","Designs and implements AI software systems, en...","['aiml', 'architecture', 'software', 'design',..."


In [None]:
df = df[['Job Title', 'Cleaned_Job_Skills']]

In [None]:
df

Unnamed: 0,Job Title,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"['web', 'accessibility', 'guideline', 'html', ..."
1,ADMIN BIG DATA,"['big', 'data', 'management', 'hadoop', 'spark..."
2,AGILE PROJECT MANAGER,"['agile', 'methodology', 'scrum', 'kanban', 'p..."
3,ANDROID DEVELOPER,"['java', 'kotlin', 'android', 'sdk', 'mobile',..."
4,ANSIBLE AUTOMATION ENGINEER,"['ansible', 'automation', 'script', 'linux', '..."
...,...,...
284,EFFECTS TECHNICAL DIRECTOR (FX TD),"['visual', 'effect', 'technical', 'expertise',..."
285,LAYOUT ARTIST,"['layout', 'cinematography', '3d', 'modeling',..."
286,AI RESEARCHER,"['aiml', 'algorithm', 'python', 'research', 'm..."
287,AI SOFTWARE ARCHITECT,"['aiml', 'architecture', 'software', 'design',..."


In [None]:
file_path = '/content/it_jobs.csv'


df.to_csv(file_path, index=False)

print(f"CSV file has been saved to {file_path}")

CSV file has been saved to /content/it_jobs.csv


In [None]:
dp = pd.read_csv('/content/it_jobs.csv')

In [None]:
dp

Unnamed: 0,Job Title,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"['web', 'accessibility', 'guideline', 'html', ..."
1,ADMIN BIG DATA,"['big', 'data', 'management', 'hadoop', 'spark..."
2,AGILE PROJECT MANAGER,"['agile', 'methodology', 'scrum', 'kanban', 'p..."
3,ANDROID DEVELOPER,"['java', 'kotlin', 'android', 'sdk', 'mobile',..."
4,ANSIBLE AUTOMATION ENGINEER,"['ansible', 'automation', 'script', 'linux', '..."
...,...,...
284,EFFECTS TECHNICAL DIRECTOR (FX TD),"['visual', 'effect', 'technical', 'expertise',..."
285,LAYOUT ARTIST,"['layout', 'cinematography', '3d', 'modeling',..."
286,AI RESEARCHER,"['aiml', 'algorithm', 'python', 'research', 'm..."
287,AI SOFTWARE ARCHITECT,"['aiml', 'architecture', 'software', 'design',..."


In [None]:
import pandas as pd
import numpy as np

In [None]:
it_jobs = pd.read_csv('/content/it_jobs.csv')

In [None]:
df[df['Job Title'] == 'SOFTWARE ENGINEER']

Unnamed: 0,Job Title,Cleaned_Job_Skills


In [None]:
data = it_jobs

# Create DataFrame
df = pd.DataFrame(data)

# Define new job titles and skills
new_jobs = [
    {'Job Title': 'SOFTWARE ENGINEER', 'Cleaned_Job_Skills': ['python', 'java', 'c', 'c++', 'software', 'algorithms', 'git', 'cloud', 'aws', 'docker', 'kubernetes', 'ci/cd', 'sql', 'node.js']},
    {'Job Title': 'AI ENGINEER', 'Cleaned_Job_Skills': ['machine learning', 'python', 'tensorflow', 'keras', 'deep learning', 'ai', 'pytorch', 'scikit-learn', 'data science', 'numpy', 'pandas', 'data engineering', 'big data', 'hadoop']},
    {'Job Title': 'WEB DEVELOPER', 'Cleaned_Job_Skills': ['html', 'css', 'javascript', 'react', 'node.js', 'angular', 'vue', 'responsive design', 'graphql', 'api', 'git', 'github', 'html5', 'css3']},
    {'Job Title': 'FRONTEND DEVELOPER', 'Cleaned_Job_Skills': ['html', 'css', 'javascript', 'react', 'angular', 'vue', 'css3', 'html5', 'responsive design', 'redux', 'webpack', 'typescript']},
    {'Job Title': 'BACKEND DEVELOPER', 'Cleaned_Job_Skills': ['node.js', 'python', 'java', 'sql', 'mongodb', 'postgresql', 'docker', 'kubernetes', 'express', 'microservices', 'rest-api', 'graphql-api']},
    {'Job Title': 'DATA ENGINEER', 'Cleaned_Job_Skills': ['big data', 'hadoop', 'spark', 'etl', 'data pipelines', 'python', 'sql', 'nosql', 'cloud', 'aws', 'data science', 'azure']},
    {'Job Title': 'MOBILE DEVELOPER (iOS/Android)', 'Cleaned_Job_Skills': ['swift', 'kotlin', 'android', 'ios', 'mobile development', 'flutter', 'xcode', 'android studio', 'mobile', 'react native', 'firebase', 'apis']},
    {'Job Title': 'DEVOPS ENGINEER', 'Cleaned_Job_Skills': ['devops', 'docker', 'kubernetes', 'ci/cd', 'aws', 'azure', 'terraform', 'automation', 'cloud-native', 'jenkins', 'vms', 'jenkins', 'infrastructure', 'linux']},
    {'Job Title': 'AI RESEARCHER', 'Cleaned_Job_Skills': ['aiml', 'algorithm', 'python', 'research', 'machine learning', 'deep learning', 'tensorflow', 'keras', 'pytorch', 'data analysis', 'scikit-learn']},
    {'Job Title': 'PRODUCT MANAGER', 'Cleaned_Job_Skills': ['product management', 'strategy', 'business', 'project management', 'market research', 'growth hacking', 'customer acquisition', 'analytics', 'leadership', 'team collaboration', 'negotiation', 'jira']},
    {'Job Title': 'PROJECT MANAGER', 'Cleaned_Job_Skills': ['project management', 'agile', 'scrum', 'kanban', 'leadership', 'team collaboration', 'communication', 'time management', 'jira', 'risk management', 'client interaction', 'productivity tools']},
    {'Job Title': 'BUSINESS INTELLIGENCE DEVELOPER', 'Cleaned_Job_Skills': ['business intelligence', 'data visualization', 'tableau', 'power bi', 'data analysis', 'excel', 'sql', 'python', 'data', 'data modeling', 'analytics']}
]

# Convert the new job data into a DataFrame
new_jobs_df = pd.DataFrame(new_jobs)

# Merge the new job data with the existing DataFrame, adding only the rows that are not already present
df = pd.concat([df, new_jobs_df[~new_jobs_df['Job Title'].isin(df['Job Title'])]]).reset_index(drop=True)

# Display the updated DataFrame
df

Unnamed: 0,Job Title,Cleaned_Job_Skills
0,ACCESSIBILITY SPECIALIST,"['web', 'accessibility', 'guideline', 'html', ..."
1,ADMIN BIG DATA,"['big', 'data', 'management', 'hadoop', 'spark..."
2,AGILE PROJECT MANAGER,"['agile', 'methodology', 'scrum', 'kanban', 'p..."
3,ANDROID DEVELOPER,"['java', 'kotlin', 'android', 'sdk', 'mobile',..."
4,ANSIBLE AUTOMATION ENGINEER,"['ansible', 'automation', 'script', 'linux', '..."
...,...,...
289,SOFTWARE ENGINEER,"[python, java, c, c++, software, algorithms, g..."
290,AI ENGINEER,"[machine learning, python, tensorflow, keras, ..."
291,FRONTEND DEVELOPER,"[html, css, javascript, react, angular, vue, c..."
292,BACKEND DEVELOPER,"[node.js, python, java, sql, mongodb, postgres..."
