In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [3]:
# Load dataset
df = pd.read_csv("/content/sample_data/resume_data.csv")  # Ensure your dataset is clean and formatted


In [4]:


# Check and preprocess necessary columns
if 'skills_required' not in df.columns or 'matched_score' not in df.columns:
    raise KeyError("Dataset must contain 'skills_required' and 'matched_score' columns.")

# Extract unique skills
df['skills_required'] = df['skills_required'].fillna('').astype(str)

def extract_skills(skill_list):
    return [skill.strip() for skill in skill_list.split('\n') if skill]

df['extracted_skills'] = df['skills_required'].apply(extract_skills)
unique_skills = sorted(set(skill for skills in df['extracted_skills'] for skill in skills))

# One-hot encode skills
skills_df = pd.DataFrame(0, index=df.index, columns=unique_skills)
for i, skills in enumerate(df['extracted_skills']):
    skills_df.loc[i, skills] = 1

df = pd.concat([df, skills_df], axis=1)

# Define features and target variable
X = df[unique_skills]  # Skills as features
y = (df['matched_score'] >= 0.7).astype(int)  # 1 if matched_score >= 0.7 (hired), else 0

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Prediction function
def predict_job(skills_list):
    """Predict whether a person with given skills can get a data science job."""
    input_data = np.zeros(len(unique_skills))
    for skill in skills_list:
        if skill in unique_skills:
            input_data[unique_skills.index(skill)] = 1
    input_data = input_data.reshape(1, -1)
    prediction = model.predict(input_data)
    return "Hired" if prediction[0] == 1 else "Not Hired"

# Example test
example_skills = ["Python", "Machine Learning", "Deep Learning", "SQL"]
print("Prediction:", predict_job(example_skills))


Model Accuracy: 0.69
Prediction: Hired




In [5]:
num_skills = len(unique_skills)
print(f"Total number of unique skills in the dataset: {num_skills}")


Total number of unique skills in the dataset: 97


In [6]:
print("Unique skills in the dataset:")
print(unique_skills)


Unique skills in the dataset:
['ASP.NET MVC Strong understanding of database design', 'AUDIT AND INSPECTION', 'AWS Cloud', 'Ansible', 'Auto CAD 2D 3D', 'AutoCAD', 'Azure', 'Banking', 'Big Data', 'Brand Promotion', 'Business Analysis', 'CCNA (Cisco Certified Network Associate)', 'Campaign Management', 'Cisco', 'Civil 3D', 'Civil Construction', 'Civil Engineering', 'Cloud Platform', 'Communication and negotiation skills', 'Computer skill', 'Corporate Marketing', 'Data Analytics', 'Database Administrator (DBA)', 'Database management', 'DevOps', 'Dockers and Kubernetes', 'ETABS', 'ETL Tools', 'Effective communication skills', 'Elasticsearch', 'Facebook Ads Manager', 'Facebook campaign', 'Facebook marketing', 'Fast typing skill', 'Field Supervision', 'GPON', 'Good communication skills', 'Google Ads', 'Google analytics', 'HRM Report', 'Hardware & Networking', 'Having CACC from reputed CA Firm', 'Human Resource Management', 'IELTSInternet browsing & online work ability.', 'IIG', 'ISP', 'IT En

In [7]:
# Example test
example_skills = ["Civil Engineering", "Communication and negotiation skills", "Corporate Marketing", "SQL"]
print("Prediction:", predict_job(example_skills))

Prediction: Not Hired




In [8]:
# Example test
example_skills = ["Elasticsearch", "Facebook Ads Manager", "IT Enabled services", "Human Resource Management"]
print("Prediction:", predict_job(example_skills))

Prediction: Not Hired




In [9]:
# Example test
example_skills = ["PyTorch", "Python", "Scikit-learn.", "TensorFlow"]
print("Prediction:", predict_job(example_skills))

Prediction: Hired


