In [None]:
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer, util


In [None]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# with open("/content/subdomain_embeddings.pkl", "rb") as f:
#     subdomain_embeddings = pickle.load(f)


In [None]:
internships = [
    {
        "title": "AI-Powered Fraud Detection System",
        "description": "Develop machine learning models using Python, TensorFlow, and SQL to detect financial fraud in real-time."
    },
    {
        "title": "Cloud Infrastructure Engineer Intern",
        "description": "Assist in deploying scalable microservices on AWS and Kubernetes with CI/CD pipelines and Docker containers."
    },
    {
        "title": "Full-Stack Developer Intern",
        "description": "Build end-to-end applications using React, Node.js, and Django, focusing on responsive UI and efficient APIs."
    },
    {
        "title": "Data Science Research Intern",
        "description": "Work on predictive analytics with Python, R, Pandas, and PyTorch for healthcare datasets."
    },
    {
        "title": "Cybersecurity Analyst Intern",
        "description": "Perform penetration testing, vulnerability assessment, and implement network security solutions with SIEM tools."
    },
    {
        "title": "DevOps Automation Intern",
        "description": "Automate deployments with Terraform, Docker, and CI/CD pipelines on Azure cloud infrastructure."
    },
    {
        "title": "Mobile App Development Intern",
        "description": "Develop Android apps with Java and React Native, integrating Firebase authentication and cloud storage."
    },
    {
        "title": "Big Data Engineer Intern",
        "description": "Process large-scale datasets using Hadoop, Spark, and Python for real-time analytics."
    },
    {
        "title": "AI Research Intern",
        "description": "Explore advanced deep learning methods using TensorFlow, PyTorch, and reinforcement learning algorithms."
    },
    {
        "title": "Network Security Intern",
        "description": "Configure firewalls, monitor security logs, and support cryptography implementations for enterprise systems."
    }
    ,{
        "title": "Autonomous Vehicle AI Intern",
        "description": "Develop and optimize perception and navigation modules using computer vision, LiDAR data processing, ROS, and reinforcement learning algorithms for autonomous driving simulations."
    },
    {
        "title": "Quantum Computing Research Intern",
        "description": "Work on designing quantum algorithms and simulating quantum circuits using Qiskit and Python, analyzing performance improvements over classical approaches for optimization problems."
    },
    {
        "title": "Robotics Control Systems Intern",
        "description": "Implement real-time control algorithms for robotic manipulators using ROS, Gazebo simulations, PID controllers, and sensor fusion techniques for motion planning."
    },
    {
        "title": "AI-Driven Healthcare Diagnostics Intern",
        "description": "Build predictive models for disease detection using multi-modal medical datasets, combining imaging, EHR records, and lab results leveraging Python, PyTorch, and scikit-learn."
    },
    {
        "title": "Blockchain Development Intern",
        "description": "Develop decentralized applications and smart contracts on Ethereum using Solidity, Web3.js, and integrate them with off-chain data using oracles and IPFS."
    },
    {
        "title": "Natural Language Processing Intern",
        "description": "Design and fine-tune transformer-based models for sentiment analysis, summarization, and question-answering systems using Hugging Face Transformers and TensorFlow."
    },
    {
        "title": "Augmented Reality Developer Intern",
        "description": "Create AR experiences and mobile applications using Unity, ARKit/ARCore, and integrate 3D object tracking with real-time environmental mapping."
    },
    {
        "title": "Edge AI and IoT Intern",
        "description": "Deploy and optimize lightweight deep learning models on edge devices (Raspberry Pi, NVIDIA Jetson) for real-time analytics, integrating with IoT sensors and MQTT protocols."
    },
    {
        "title": "Reinforcement Learning Research Intern",
        "description": "Implement and experiment with deep reinforcement learning algorithms like PPO and DDPG to optimize multi-agent environments using Python and PyTorch."
    },
    {
        "title": "Cyber-Physical Systems Security Intern",
        "description": "Analyze and secure industrial control systems, SCADA networks, and IoT devices by performing threat modeling, penetration testing, and implementing anomaly detection with ML techniques."
    }
]


In [None]:
for internship in internships:
    text = f"Internship Title: {internship['title']}. Description: {internship['description']}"
    internship["embedding"] = model.encode(text, convert_to_tensor=True)
user_skills = ['hadoop','aws','azure']
user_text = f"User profile with skills: {', '.join(user_skills)}."
user_embedding = model.encode(user_text, convert_to_tensor=True)



In [None]:
def compute_skill_overlap(user_skills, internship_embedding, model):
    skill_sims = []
    for skill in user_skills:
        skill_embedding = model.encode(skill, convert_to_tensor=True)
        sim = util.cos_sim(skill_embedding, internship_embedding).item()
        skill_sims.append(sim)
    return sum(skill_sims) / len(skill_sims) if skill_sims else 0.0


In [None]:

results = []
for internship in internships:
    sim = util.cos_sim(user_embedding, internship["embedding"]).item()
    overlap = compute_skill_overlap(user_skills, internship["embedding"], model)
    final_score = 0.3 * sim + 0.7 * overlap

    results.append({
        "title": internship["title"],
        "description": internship["description"],
        "similarity": round(sim, 4),
        "skill_overlap": round(overlap, 4),
        "final_score": round(final_score, 4)
    })
recommendations = sorted(results, key=lambda x: x["final_score"], reverse=True)
filtered_recommendations = [r for r in recommendations if r["final_score"] > 0.10]
df = pd.DataFrame(filtered_recommendations[:5])
print(df[["title", "description", "similarity", "skill_overlap", "final_score"]])

                                  title  \
0              Big Data Engineer Intern   
1              DevOps Automation Intern   
2  Cloud Infrastructure Engineer Intern   
3         Mobile App Development Intern   
4                Edge AI and IoT Intern   

                                         description  similarity  \
0  Process large-scale datasets using Hadoop, Spa...      0.5157   
1  Automate deployments with Terraform, Docker, a...      0.4473   
2  Assist in deploying scalable microservices on ...      0.4247   
3  Develop Android apps with Java and React Nativ...      0.3962   
4  Deploy and optimize lightweight deep learning ...      0.2863   

   skill_overlap  final_score  
0         0.3571       0.4047  
1         0.3383       0.3710  
2         0.3318       0.3596  
3         0.2580       0.2994  
4         0.2621       0.2693  
