<a href="https://colab.research.google.com/github/Tanishqfarkya5/Skill-Gap-Analyzer/blob/main/Skill_Gap_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install -q streamlit pyngrok sentence-transformers pymupdf

import os
from pyngrok import ngrok, conf
import streamlit as st
import fitz  # PyMuPDF
import re
from sentence_transformers import SentenceTransformer, util
import numpy as np
import threading

# ----------------------------
# Set your tokens here:
HF_TOKEN = "<YOUR_HF_TOKEN>"          # Huggingface token (optional)
NGROK_AUTH_TOKEN = "2xfB6yyr2S8IL4sxEMZKwH6k1vO_3wz5GjaDzf5wojjUL2W5b"  # Ngrok authtoken (required)
# ----------------------------

# Authenticate Huggingface (optional, only if you want to suppress warnings)
if HF_TOKEN and HF_TOKEN != "<YOUR_HF_TOKEN>":
    from huggingface_hub import login
    login(token=HF_TOKEN)

# Authenticate ngrok
conf.get_default().auth_token = NGROK_AUTH_TOKEN
print("Ngrok authtoken set.")

# Prepare Streamlit app code as a string (your provided code)
streamlit_code = '''
import streamlit as st
import fitz  # PyMuPDF
import re
from sentence_transformers import SentenceTransformer, util
import numpy as np

@st.cache_resource(show_spinner=False)
def load_model():
    return SentenceTransformer('all-MiniLM-L6-v2')

model = load_model()

job_role_profiles = {
    "Data Analyst": ["SQL", "Python", "Excel", "Tableau", "Power BI", "Data Visualization", "Statistics", "Data Cleaning", "R"],
    "Software Engineer": ["Python", "Java", "C++", "Git", "OOP", "Data Structures", "Algorithms", "APIs", "Testing", "Docker", "Kubernetes"],
    "Digital Marketer": ["SEO", "Google Analytics", "Email Marketing", "Content Writing", "Social Media", "Copywriting", "Canva", "Google Ads"],
    "Financial Analyst": ["Excel", "Financial Modeling", "Accounting", "SQL", "Power BI", "Valuation", "Statistics", "Risk Management"],
    "HR Specialist": ["Recruiting", "Onboarding", "Communication", "HRIS Systems", "Excel", "Conflict Resolution", "Payroll"],
    "Commerce Graduate": ["Tally", "Accounting", "Business Communication", "Excel", "Marketing Basics", "Taxation", "Banking Concepts"],
    "Graphic Designer": ["Adobe Photoshop", "Illustrator", "Figma", "Canva", "Color Theory", "Typography", "Creativity"],
    "UX/UI Designer": ["Figma", "Wireframing", "User Research", "Prototyping", "Design Thinking", "HTML", "CSS"]
}

course_recommendations = {
    "SQL": "SQL for Data Science – Coursera",
    "Excel": "Mastering Excel – Udemy",
    "Tableau": "Tableau A-Z – Udemy",
    "Power BI": "Power BI Essentials – LinkedIn Learning",
    "Statistics": "Statistics for Data Science – edX",
    "Python": "Python for Everybody – Coursera",
    "Java": "Java Programming Masterclass – Udemy",
    "C++": "C++ for Beginners – Codecademy",
    "Git": "Git & GitHub Crash Course – Udemy",
    "SEO": "SEO Specialization – Coursera",
    "Google Analytics": "Google Analytics for Beginners – Google Academy",
    "Email Marketing": "Email Marketing Basics – HubSpot Academy",
    "Figma": "Figma UX Design – Coursera",
    "Illustrator": "Adobe Illustrator for Beginners – Udemy",
    "Tally": "Tally ERP9 Training – Udemy",
    "Accounting": "Financial Accounting Fundamentals – Coursera",
    "Canva": "Graphic Design with Canva – Skillshare",
    "Financial Modeling": "Financial Modeling & Valuation – CFI",
    "Communication": "Business Communication Skills – Coursera",
    "Docker": "Docker for Beginners – Udemy",
    "Kubernetes": "Kubernetes Essentials – Coursera",
    "Google Ads": "Google Ads Certification – Google Academy"
}

def extract_text_from_pdf(file):
    doc = fitz.open(stream=file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def extract_skills(text, skill_list):
    found_skills = set()
    for skill in skill_list:
        pattern = r"\\b" + re.escape(skill) + r"\\b"
        if re.search(pattern, text, re.IGNORECASE):
            found_skills.add(skill)
    return list(found_skills)

def compute_similarity(student_skills, role_skills):
    if not student_skills or not role_skills:
        return 0.0
    student_embeds = model.encode(student_skills, convert_to_tensor=True)
    role_embeds = model.encode(role_skills, convert_to_tensor=True)
    cosine_scores = util.cos_sim(student_embeds, role_embeds)
    max_scores = cosine_scores.max(dim=0).values.cpu().numpy()
    avg_similarity = np.mean(max_scores)
    return float(avg_similarity) * 100

def analyze_skill_gap(student_skills, role_skills):
    matched_skills = list(set(student_skills).intersection(role_skills))
    missing_skills = list(set(role_skills) - set(student_skills))
    match_score = compute_similarity(student_skills, role_skills)
    return matched_skills, missing_skills, round(match_score, 2)

def recommend_courses(missing_skills):
    return [course_recommendations[skill] for skill in missing_skills if skill in course_recommendations]

st.title("AI-Powered Skill Gap Analyzer")

uploaded_file = st.file_uploader("Upload your resume (PDF)", type=["pdf"])

if uploaded_file:
    resume_text = extract_text_from_pdf(uploaded_file)
    all_skills = set(skill for skills in job_role_profiles.values() for skill in skills)
    extracted_skills = extract_skills(resume_text, all_skills)

    st.subheader("Extracted Skills from Resume")
    st.write(extracted_skills if extracted_skills else "No matching skills found.")

    st.subheader("Select your target job role")
    selected_role = st.selectbox("Job Role", list(job_role_profiles.keys()))

    if st.button("Analyze Skill Gap"):
        role_skills = job_role_profiles[selected_role]
        matched, missing, score = analyze_skill_gap(extracted_skills, role_skills)
        recommendations = recommend_courses(missing)

        st.subheader("Skill Gap Report")
        st.write(f"**Target Role:** {selected_role}")
        st.write(f"**Match Score:** {score}%")
        st.write(f"**Skills You Have:** {matched}")
        st.write(f"**Skills You Need to Learn:** {missing}")

        st.subheader("Recommended Courses")
        if recommendations:
            for c in recommendations:
                st.write(f"- {c}")
        else:
            st.write("No specific recommendations available.")
'''

# Save the Streamlit app code to a file
with open("skill_gap_app.py", "w") as f:
    f.write(streamlit_code)

# Run the streamlit app in a new thread (to avoid blocking the notebook)
def run_streamlit():
    os.system('streamlit run skill_gap_app.py')

import threading
threading.Thread(target=run_streamlit, daemon=True).start()

# Setup ngrok tunnel for Streamlit
public_url = ngrok.connect(8501)
print(f"Your Streamlit app is live at: {public_url}")


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m54.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m23.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━