In [1]:
!pip install transformers torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [10]:
pip install -U bitsandbytes



In [16]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from google.colab import userdata


try:
    hf_token = userdata.get('hf_token')
    if not hf_token:
        print("Warning: HF_TOKEN not found in Colab Secrets under 'hf_token'. Proceeding for public model access.")
        hf_token = None
except Exception as e:
    print(f"Error retrieving HF_TOKEN: {str(e)}")
    print("Warning: Token retrieval failed, but proceeding for public model access.")
    hf_token = None


!pip install -U transformers accelerate -q


try:
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=hf_token)
    model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-2-2b-it",
        token=hf_token,
        device_map="auto",
        torch_dtype=torch.float16
    )
except Exception as e:
    print(f"Error loading model or tokenizer: {str(e)}")
    exit(1)

def generate_questions(intern_profile, job_description):

    prompt = f"""
    You are an expert interview question generator for intern roles. Based on the provided intern profile and job description, generate 5 technical and 5 behavioral interview questions tailored to the candidate's skills and the role's requirements. Ensure technical questions match the candidate’s experience level and the job’s technical needs, and behavioral questions follow the STAR framework (Situation, Task, Action, Result).

    Intern Profile:
    {intern_profile}

    Job Description:
    {job_description}

    Output Format:
    Technical Questions:
    1. [Question]
    2. [Question]
    3. [Question]
    4. [Question]
    5. [Question]

    Behavioral Questions:
    1. [Question]
    2. [Question]
    3. [Question]
    4. [Question]
    5. [Question]
    """
    messages = [
        {"role": "user", "content": prompt}
    ]
    try:

        inputs = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt"
        ).to(model.device)


        outputs = model.generate(
            **inputs,
            max_new_tokens=600,
            temperature=0.7,
            do_sample=True,
            top_p=0.9
        )

        generated_text = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
        return generated_text.strip()
    except Exception as e:
        return f"Error generating questions: {str(e)}"


intern_profile_1 = """
- Role: Data Science Intern
- Skills: Python, SQL, basic machine learning
- Experience: Beginner, 2nd-year BSc Computer Science
- Projects: Built a linear regression model for a university project
"""
job_description_1 = """
- Role: Data Science Intern
- Skills Required: Python, data analysis, team collaboration
- Responsibilities: Analyze datasets, build predictive models, present findings
"""

# Sample Input 2: Software Engineering Intern
intern_profile_2 = """
- Role: Software Engineering Intern
- Skills: Python, Java, basic understanding of web development (HTML, CSS)
- Experience: Beginner, 3rd-year BSc Computer Science
- Projects: Developed a simple web application using Flask and Python for a course project
"""
job_description_2 = """
- Role: Software Engineering Intern
- Skills Required: Python, Java, web development, problem-solving
- Responsibilities: Assist in developing backend features, write clean code, participate in code reviews
"""


intern_profile_3 = """
- Role: Product Management Intern
- Skills: Basic Python, user research, communication
- Experience: Beginner, 2nd-year BA Business Administration
- Projects: Conducted user interviews for a mobile app prototype in a class project
"""
job_description_3 = """
- Role: Product Management Intern
- Skills Required: User research, data analysis, teamwork
- Responsibilities: Gather user feedback, assist in defining product requirements, analyze usage data
"""


sample_inputs = [
    ("Data Science Intern", intern_profile_1, job_description_1),
    ("Software Engineering Intern", intern_profile_2, job_description_2),
    ("Product Management Intern", intern_profile_3, job_description_3)
]

for role, intern_profile, job_description in sample_inputs:
    print(f"\n=== {role} Questions ===")
    questions = generate_questions(intern_profile, job_description)
    print(questions)

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]


=== Data Science Intern Questions ===


W0721 15:51:38.774000 534 torch/_inductor/utils.py:1137] [0/0] Not enough SMs to use max_autotune_gemm mode


## Technical Questions:

1. **Can you walk me through the process of building a linear regression model, starting from data cleaning and feature engineering to model evaluation?** (This assesses their understanding of the ML workflow and ability to articulate it)
2. **You're given a dataset with missing values. How would you handle these missing values in your analysis?** (Tests their knowledge of data preprocessing techniques)
3. **Explain the difference between a classification model and a regression model. Provide an example of when each would be appropriate.** (Evaluates their understanding of different model types and their application scenarios)
4. **Describe a time you encountered a challenging data analysis problem. How did you approach it, and what was the outcome?** (Assesses their problem-solving skills and ability to handle real-world issues)
5. **What are some popular libraries in Python for data visualization? Why are they useful in data science?** (Tests their familiarit