In [9]:
import pandas as pd
import numpy as np
import os
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import PyPDF2
import docx
import tkinter as tk
from tkinter import filedialog
import warnings
warnings.filterwarnings("ignore")

# Step 1: Load dataset and prepare Resume field
df = pd.read_csv("C:/Users/manik/OneDrive/Desktop/generated_profiles.csv")
df.columns = df.columns.str.strip()

df["Resume"] = (
    df["Summary/Profile"].fillna('') + ". " +
    df["Technical Skills"].fillna('') + ". " +
    df["Soft Skills"].fillna('') + ". " +
    df["Certifications & Courses"].fillna('') + ". " +
    df["Projects"].fillna('') + ". " +
    df["Internships/Work Experience"].fillna('') + ". " +
    df["Achievements"].fillna('') + ". " +
    df["Extracurricular Activities"].fillna('') + ". " +
    df["Preferred Domain / Interest Area"].fillna('') + ". " +
    df["Education"].fillna('') + ". " +
    df["Another Designation"].fillna('')
)

df = df.rename(columns={"Goal Role": "Target"})
df_cleaned = df.dropna(subset=["Resume", "Target"]).reset_index(drop=True)

# Step 2: Generate BERT embeddings
bert_model = SentenceTransformer('all-MiniLM-L6-v2')
resume_embeddings = bert_model.encode(df_cleaned["Resume"].tolist(), show_progress_bar=True)

# Step 3: Label Encoding and Train-Test Split
le = LabelEncoder()
y = le.fit_transform(df_cleaned["Target"])

X_train, X_test, y_train, y_test = train_test_split(
    resume_embeddings, y, test_size=0.2, random_state=42, stratify=y
)

# Step 4: Train the XGBoost model
model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='mlogloss',
    n_estimators=1000,
    learning_rate=0.03,
    max_depth=7,
    subsample=0.8,
    colsample_bytree=0.8,
    gamma=5,
    min_child_weight=5,
    objective='multi:softprob'
)
model.fit(X_train, y_train)

# Step 5: File picker to upload resume
def upload_resume():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(
        title="Select Your Resume",
        filetypes=[("Supported files", "*.pdf *.docx *.txt")]
    )
    return file_path

# Step 6: Extract text from resume
def extract_text_from_file(filepath):
    ext = os.path.splitext(filepath)[-1].lower()
    text = ""
    if ext == ".pdf":
        with open(filepath, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
    elif ext == ".docx":
        doc = docx.Document(filepath)
        for para in doc.paragraphs:
            text += para.text + "\n"
    elif ext == ".txt":
        with open(filepath, "r", encoding="utf-8") as file:
            text = file.read()
    else:
        raise ValueError("Unsupported file format. Please upload PDF, DOCX, or TXT.")
    return text.strip()




Batches:   0%|          | 0/32 [00:00<?, ?it/s]

In [10]:
import os
import io
import PyPDF2
import docx
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier


bert_model = SentenceTransformer('all-MiniLM-L6-v2')


# Create the file upload widget
upload_widget = widgets.FileUpload(
    accept='.pdf,.docx,.txt',
    multiple=False,
    description="📤 Upload Resume"
)

display(upload_widget)

def on_upload_change(change):
    clear_output(wait=True)
    display(upload_widget)

    uploaded_file = list(upload_widget.value.values())[0]
    filename = uploaded_file['metadata']['name']
    content = uploaded_file['content']

   
    ext = os.path.splitext(filename)[-1].lower()
    resume_text = ""
    
    if ext == '.pdf':
        with io.BytesIO(content) as pdf_file:
            reader = PyPDF2.PdfReader(pdf_file)
            for page in reader.pages:
                text = page.extract_text()
                if text:
                    resume_text += text + "\n"
                    
    elif ext == '.docx':
        with open("temp.docx", "wb") as f:
            f.write(content)
        doc = docx.Document("temp.docx")
        for para in doc.paragraphs:
            resume_text += para.text + "\n"
        os.remove("temp.docx")
        
    elif ext == '.txt':
        resume_text = content.decode("utf-8")
    
    else:
        print("Unsupported file format")
        return

    resume_lower = resume_text.lower()
    word_count = len(resume_text.split())

    if word_count < 100:
        print("The uploaded document is too short to be a valid resume.")
        return

    section_keywords = [
        "education", "skills", "experience", "project", "internship", 
        "certification", "achievement", "profile", "summary", "objective"
    ]
    section_matches = sum(1 for keyword in section_keywords if keyword in resume_lower)

    if section_matches < 3:
        print("The document does not appear to contain enough resume-related content.")
        print("Please upload a valid resume with sections like 'Skills', 'Projects', or 'Experience'.")
        return

    user_embedding = bert_model.encode([resume_text])
    pred_probs = model.predict_proba(user_embedding)[0]
    top_indices = np.argsort(pred_probs)[::-1][:3]
    top_roles = le.inverse_transform(top_indices)

    print("\nBased on your resume, suitable job roles for you are:")
    for i, idx in enumerate(top_indices, 1):
        print(f"{i}. {le.inverse_transform([idx])[0]}")

upload_widget.observe(on_upload_change, names='value')


FileUpload(value={'ADS-Meiy_cv_slate.pdf': {'metadata': {'name': 'ADS-Meiy_cv_slate.pdf', 'type': 'application…


Based on your resume, suitable job roles for you are:
1. Marketing Manager
2. Data Scientist
3. UI/UX Designer


In [30]:
import requests
import pandas as pd
import re

# Function to fetch job data from JSearch
def fetch_jobs(job_title, num_pages=3):
    url = "https://jsearch.p.rapidapi.com/search"
    headers = {
        "X-RapidAPI-Key": "c3ef6fae66msh94ef11a4afe89bap155351jsn62cb7c14910d",  # 🔁 Replace with your actual key
        "X-RapidAPI-Host": "jsearch.p.rapidapi.com"
    }

    all_jobs = []

    # Define a list of common technical skills (you can expand this list)
    tech_skills = [
        "Python", "Java", "SQL", "JavaScript", "C++", "R", "C#", "HTML", "CSS", 
        "Machine Learning", "Deep Learning", "TensorFlow", "Keras", "SQL", 
        "Data Analysis", "Data Science", "AWS", "GCP", "Docker", "Kubernetes", "Git", "Linux"
    ]

    for page in range(1, num_pages + 1):
        params = {
            "query": job_title,
            "page": str(page)
        }
        response = requests.get(url, headers=headers, params=params)
        data = response.json()

        for job in data.get("data", []):
            job_title = job.get("job_title", "")
            job_description = job.get("job_description", "")

            # Find technical skills mentioned in the job description
            skills_found = []
            for skill in tech_skills:
                if re.search(r'\b' + re.escape(skill) + r'\b', job_description, re.IGNORECASE):
                    skills_found.append(skill)

            all_jobs.append({
                "Job Title": job_title,
                "Job Description": job_description,
                "Technical Skills": ", ".join(skills_found)
            })

    df = pd.DataFrame(all_jobs)
    return df

# Example usage
job_input = input("Enter a job role: ")  # 👈 Let user give input like "Data Analyst"
df_jobs = fetch_jobs(job_input)
print(df_jobs)


Enter a job role: Full Stack Developer
                                            Job Title  \
0      Senior Developer – Full Stack & Full Impact DC   
1   Full Stack Developer with Active TS Clearance ...   
2   Full Stack Developer – Angular + Spring Boot |...   
3                      Full Stack DevSecOps Engineers   
4              Mid-Senior Full Stack Drupal Developer   
5                           Full Stack Developer Jobs   
6                         Junior Full Stack Developer   
7   Software Engineer, Full Stack (Angular, TypeSc...   
8                       Full-Stack Software Developer   
9                  Full Stack Software Developer Jobs   
10                        Junior Full Stack Developer   
11    Software Developer (Full Stack - Python, React)   
12               Senior Full Stack Developer (Remote)   
13                          Full Stack Developer Jobs   
14        Senior Full-Stack (Java) Software Developer   
15   Full Stack Software Developer (MUST HAVE TS/