In [None]:
# Cell 1: Install dependencies
!pip install transformers torch pandas --quiet


In [None]:
# Cell 2: Create a synthetic skill-career dataset locally in Colab
import pandas as pd

data = {
    "Skill": ["Python", "Machine Learning", "JavaScript", "Data Analysis", "React",
              "Python", "Data Science", "SQL", "Django", "Communication"],
    "RecommendedCareer": ["Data Scientist", "AI Engineer", "Frontend Developer", "Business Analyst", "Frontend Developer",
                         "Backend Developer", "Data Scientist", "Database Administrator", "Backend Developer", "HR Manager"]
}

df = pd.DataFrame(data)
df.to_csv("career_data.csv", index=False)
df.head()


Unnamed: 0,Skill,RecommendedCareer
0,Python,Data Scientist
1,Machine Learning,AI Engineer
2,JavaScript,Frontend Developer
3,Data Analysis,Business Analyst
4,React,Frontend Developer


In [None]:
# Cell 3: Preprocess dataset and prepare features/labels
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Example: use binary indicators of top 9 skills and target = 'RecommendedCareer'
skills = df['Skill'].unique()[:9].tolist()
for skill in skills:
    df[f'skill_{skill}'] = df['Skill'].apply(lambda x: 1 if x==skill else 0)

le = LabelEncoder()
df['career_label'] = le.fit_transform(df['RecommendedCareer'])

X = df[[f'skill_{s}' for s in skills]]
y = df['career_label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Cell 4: Train a KNN model for baseline recommendations
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Inspect the fitted model's expectations
print("KNN model expects", knn.n_features_in_, "features.")
if hasattr(knn, 'feature_names_in_'):
    print("KNN model expects features with names:", knn.feature_names_in_)
    fitted_feature_names = knn.feature_names_in_ # Capture the fitted feature names
else:
    print("KNN model does not have feature names.")
    fitted_feature_names = None


y_pred = knn.predict(X_test)
print("Baseline KNN accuracy:", accuracy_score(y_test, y_pred))

KNN model expects 9 features.
KNN model expects features with names: ['skill_Python' 'skill_Machine Learning' 'skill_JavaScript'
 'skill_Data Analysis' 'skill_React' 'skill_Data Science' 'skill_SQL'
 'skill_Django' 'skill_Communication']
Baseline KNN accuracy: 0.5


In [None]:
# Cell 5: Load offline LLM models (GPT-2, DistilGPT-2, DialoGPT)
import torch
from transformers import (
    GPT2LMHeadModel, GPT2Tokenizer,
    pipeline, AutoModelForCausalLM, AutoTokenizer
)

# GPT-2
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token

# DistilGPT-2
distilgpt2 = pipeline('text-generation', model='distilgpt2')

# DialoGPT
dg_name = "microsoft/DialoGPT-medium"
dg_tokenizer = AutoTokenizer.from_pretrained(dg_name)
dg_model = AutoModelForCausalLM.from_pretrained(dg_name)

print("All models loaded")


Device set to use cpu


All models loaded


In [None]:
# Updated Cell 6: detailed_advice function with attention_mask
def detailed_advice(profile_text, max_new_tokens=256):
    prompt = f"Profile:\n{profile_text}\nAdvice:\n"
    inputs = gpt2_tokenizer.encode(prompt, return_tensors='pt', truncation=True)
    attention_mask = torch.ones_like(inputs)

    with torch.no_grad():
        outputs = gpt2_model.generate(
            inputs,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,      # use max_new_tokens only
            temperature=0.65,                   # lower temperature for coherent output
            top_p=0.85,
            do_sample=True,
            no_repeat_ngram_size=3,
            pad_token_id=gpt2_tokenizer.eos_token_id
        )
    text = gpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.replace(prompt, "").strip()


In [None]:
#Cell 7
def quick_tip(skills, industry, max_new_tokens=50):
    prompt = f"Skills: {skills}. Industry: {industry}. Next skill to learn:"
    result = distilgpt2(
        prompt,
        max_new_tokens=max_new_tokens,
        num_return_sequences=1,
        temperature=0.65,
        top_p=0.85,
        do_sample=True,
        no_repeat_ngram_size=3
    )
    return result[0]['generated_text'].replace(prompt, "").strip()


In [None]:
# Cell 8: Function: conversation via DialoGPT
chat_hist = None
def chat_career(msg, max_len=150):
    global chat_hist
    user = dg_tokenizer.encode(msg+dg_tokenizer.eos_token, return_tensors='pt')
    bot_in = torch.cat([chat_hist, user], dim=-1) if chat_hist is not None else user
    chat_hist = dg_model.generate(
        bot_in, max_length=bot_in.shape[1]+max_len,
        num_beams=5, early_stopping=True,
        pad_token_id=dg_tokenizer.eos_token_id
    )
    return dg_tokenizer.decode(chat_hist[:, bot_in.shape[-1]:][0], skip_special_tokens=True)


In [None]:
# Cell 9: Full consultation function combining model + KNN baseline
def full_consult(skills, industry):
    prof = f"Skills: {skills}\nIndustry: {industry}"
    plan = detailed_advice(prof)
    tip = quick_tip(skills, industry)
    knn_input = [1 if s in skills.split(',') else 0 for s in skills][:10]
    knn_pred = le.inverse_transform(knn.predict([knn_input]))[0]
    return {
        "baseline_recommendation": knn_pred,
        "detailed_plan": plan,
        "quick_tip": tip
    }


In [None]:
# Cell 10: Run full consultation (replace arguments as needed)
import numpy as np
import pandas as pd # Import pandas here as well for creating DataFrame

# Assuming fitted_feature_names is available from the previous cell's execution
if fitted_feature_names is None:
    raise RuntimeError("Fitted feature names are not available. Please run the previous cell first.")

def full_consult(skills_input_str, industry, feature_names):
    prof = f"Skills: {skills_input_str}\nIndustry: {industry}"
    plan = detailed_advice(prof)
    tip = quick_tip(skills_input_str, industry)

    # Use the provided feature_names for consistency
    input_skills = [s.strip() for s in skills_input_str.split(',')]
    # Extract the base skill name from the feature name (e.g., 'skill_Python' -> 'Python')
    feature_skill_map = {name.replace('skill_', ''): name for name in feature_names}

    knn_input_data = np.zeros(len(feature_names), dtype=int)
    for skill in input_skills:
        if skill in feature_skill_map:
            # Find the index based on the original skills list order if needed,
            # but for now rely on the order of feature_names
             knn_input_data[list(feature_skill_map.keys()).index(skill)] = 1


    # Create a pandas DataFrame with the correct column names
    knn_input_df = pd.DataFrame([knn_input_data], columns=feature_names)


    knn_pred = le.inverse_transform(knn.predict(knn_input_df))[0]

    return {
        "baseline_recommendation": knn_pred,
        "detailed_plan": plan,
        "quick_tip": tip
    }

# Pass the fitted feature names to the function
result = full_consult("Python, Machine Learning, Data Analysis", "Tech", fitted_feature_names)
print("Baseline:", result["baseline_recommendation"])
print("\nDetailed Plan:\n", result["detailed_plan"])
# print("\nQuick Tip:\n", result["quick_tip"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Baseline: Backend Developer

Detailed Plan:
 I'm a Python programmer, so I know a lot about programming. I've been doing some research on Python and Machine Learning.
I also like to talk about the history of the Python programming language.
Here are some of my favorite parts of the book:
Python is a great language. It's not the best, but it's a great way to learn Python.
There are a lot of good articles on Python here.
In the end, I think it's very easy to learn a language that's very different from the rest of the world.
The book has a lot to say about the Python community. It has a great focus on machine learning and machine learning, and a lot more about machine learning.
This is a good book. It covers a lot.
So, what's the future of Python?
I think that this book is a very good introduction to Python. It gives a lot for Python developers to learn, and it's not just about learning. It also gives a good overview of how Python is being used in the world today.
A lot of people are goin

In [None]:
# Cell 11: Interactive AI Career Choice and Emerging AI Integration Advice

def interactive_career_advisor():
    career_options = [
        "Data Scientist",
        "AI Engineer",
        "Frontend Developer",
        "Backend Developer",
        "Business Analyst",
        "Machine Learning Researcher"
    ]

    print("AI Career Advisor - Please select a career path to receive guidance:")
    for idx, option in enumerate(career_options, start=1):
        print(f"{idx}. {option}")

    choice = None
    while choice not in range(1, len(career_options)+1):
        try:
            choice = int(input(f"Enter the option number (1-{len(career_options)}): "))
            if choice not in range(1, len(career_options)+1):
                print("Invalid option number. Please try again.")
        except ValueError:
            print("Invalid input. Please enter a valid number.")

    selected_career = career_options[choice - 1]
    print(f"\nYou selected: {selected_career}")

    # More explicit prompt for selected career advice
    selected_prompt = (
        f"Provide detailed, focused career advice for someone pursuing a career as a "
        f"{selected_career}. Include skills required, common challenges, and growth opportunities."
    )
    detailed_career_advice = detailed_advice(selected_prompt)
    print("\nAI Career Advice:")
    print(detailed_career_advice)

    # Prompt to discuss emerging AI trends for the selected career
    emerging_prompt_selected = (
        f"Discuss emerging trends in Artificial Intelligence specifically in the field of "
        f"{selected_career} and how AI can revolutionize and be integrated in this area."
    )
    emerging_ai_advice_selected = detailed_advice(emerging_prompt_selected, max_new_tokens=200)
    print("\nEmerging AI Trends and Integration for selected career:")
    print(emerging_ai_advice_selected)

    # Prepare prompt listing other careers not chosen
    other_careers = [c for i, c in enumerate(career_options) if i != choice - 1]
    other_prompt = (
        f"Briefly describe emerging AI trends and opportunities in the fields of "
        f"{', '.join(other_careers)}. Compare and contrast these fields to {selected_career}."
    )
    emerging_ai_advice_others = detailed_advice(other_prompt, max_new_tokens=200)
    print("\nEmerging AI Trends and Integration for other careers:")
    print(emerging_ai_advice_others)

# Run interactive advisor
interactive_career_advisor()



AI Career Advisor - Please select a career path to receive guidance:
1. Data Scientist
2. AI Engineer
3. Frontend Developer
4. Backend Developer
5. Business Analyst
6. Machine Learning Researcher
Enter the option number (1-6): 1

You selected: Data Scientist

AI Career Advice:
Write a career plan for your career and plan to meet with a Data Science Analyst at least once a month.
Have a team of Data Science professionals on hand to answer questions about your career, and help you with your career goals.
Help you develop a career for yourself, and share your insights with others.
Write for a professional who is passionate about data science, and you will be able to gain valuable insight into your own career and career path.
A Data Scientist is a qualified Data Scientist with a passion for data science. They are interested in data science and data analytics, and want to contribute to the development of the next generation of data science tools.
You will also find that the most successful 