In [186]:
import json
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [187]:
with open("/content/drive/MyDrive/Colab Notebooks/cluster_questions.json") as f:
    cluster_questions = json.load(f)
with open("/content/drive/MyDrive/Colab Notebooks/domain_questions.json") as f:
    domain_questions = json.load(f)
with open("/content/drive/MyDrive/Colab Notebooks/role_questions_full.json") as f:
    role_questions = json.load(f)
with open("/content/drive/MyDrive/Colab Notebooks/clusters.json") as f:
    cluster_hierarchy = json.load(f)
with open("/content/drive/MyDrive/Colab Notebooks/domains.json") as f:
    domain_hierarchy = json.load(f)
with open("/content/drive/MyDrive/Colab Notebooks/roles_full.json") as f:
    role_hierarchy = json.load(f)


In [188]:
import numpy as np

def encode_answer(ans):
    ans = ans.strip().lower()
    if ans == 'yes':
        return 2
    elif ans == 'not sure':
        return 1
    else:
        return 0

def pad_vector(vec, max_len):
    # Pads answer vector with zeros to max_len
    return vec + [0]*(max_len - len(vec))


def predict_model(model, input_vector):
    # Example: model expects 2D tensor [1, input_dim], returns probabilities for labels
    import torch
    with torch.no_grad():
        x = torch.tensor([input_vector], dtype=torch.float32)
        out = model(x)
        probs = torch.softmax(out, dim=1).cpu().numpy()[0]
    return probs

In [189]:
def generate_synthetic_data(questions_dict, labels, n_samples=300, input_dim=None):
    X, y = [], []
    for _ in range(n_samples):
        label = random.choice(labels)
        features = []
        for lbl in labels:
            match = int(lbl == label)
            for _ in range(2):  # assuming 2 questions per label
                features.append(match + random.uniform(-0.2, 0.2))
        X.append(features)
        y.append(labels.index(label))
    X = np.array(X, dtype=np.float32)
    if input_dim is not None and X.shape[1] < input_dim:
        X = np.pad(X, ((0, 0), (0, input_dim - X.shape[1])), constant_values=0)
    return X, np.array(y, dtype=np.int64)

In [190]:
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(32, output_dim)
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

In [191]:
def train_model(X, y, output_dim, input_dim=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if input_dim is None:
        input_dim = X.shape[1]
    model = SimpleMLP(input_dim, output_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2)
    for _ in range(100):
        inputs = torch.from_numpy(X_train).float().to(device)
        targets = torch.from_numpy(y_train).long().to(device)
        optimizer.zero_grad()
        loss = criterion(model(inputs), targets)
        loss.backward()
        optimizer.step()
    return model

In [192]:
def ask_questions_with_model(level_name, questions_dict, model, labels):
    max_questions = max(len(q_list) for q_list in questions_dict.values())
    user_answers_dict = {label: [] for label in labels}
    question_indices = {label: 0 for label in labels}

    def get_next_question(label):
        idx = question_indices[label]
        if idx < len(questions_dict[label]):
            question_indices[label] += 1
            return questions_dict[label][idx]
        return None

    def encode_answers_dict(ans_dict):
        combined = []
        for label in labels:
            combined.extend([encode_answer(a) for a in ans_dict[label]])
        return combined

    print(f"\n✨ Let's explore your interests for the {level_name} level! ✨")

    # Round 1: Ask first question from each label
    for label in labels:
        q = get_next_question(label)
        ans = input(f"👉 {q} (Yes/No/Not Sure): ").strip()
        user_answers_dict[label].append(ans)

    # Manual scoring to detect tie
    scores = {label: sum(encode_answer(ans) for ans in answers)
              for label, answers in user_answers_dict.items()}
    max_score = max(scores.values())
    tied_labels = [label for label, score in scores.items() if score == max_score]

    # Tie-break rounds
    tie_break_round = 2
    while len(tied_labels) > 1 and tie_break_round <= max_questions:
        print(f"\n🔄 Tie detected among {tied_labels}, tie-break round {tie_break_round - 1}")
        for label in tied_labels:
            q = get_next_question(label)
            if q is not None:
                ans = input(f"👉 (Tie-break) {q} (Yes/No/Not Sure): ").strip()
                user_answers_dict[label].append(ans)
            else:
                user_answers_dict[label].append("no")  # default

        scores = {label: sum(encode_answer(ans) for ans in user_answers_dict[label])
                  for label in tied_labels}
        max_score = max(scores.values())
        tied_labels = [label for label, score in scores.items() if score == max_score]
        tie_break_round += 1

    # If tie persists after all questions
    if len(tied_labels) > 1:
        print(f"\n⚠️ Tie unresolved after all questions. Selecting randomly among {tied_labels}.")
        final_label = random.choice(tied_labels)
    else:
        final_label = tied_labels[0]

    # Final confirmation question
    confirm_q = get_next_question(final_label)
    if confirm_q:
        ans = input(f"\n👉 Final confirmation - {confirm_q} (Yes/No/Not Sure): ").strip()
        user_answers_dict[final_label].append(ans)
        if encode_answer(ans) == 0:  # user says 'no'
            # Fall back to second-best label (based on model prediction, not rules)
            input_vec = pad_vector(encode_answers_dict(user_answers_dict), len(labels) * max_questions)
            probs = predict_model(model, input_vec)
            second_probs = np.partition(probs, -2)[-2]
            second_indices = np.where(probs == second_probs)[0]
            second_labels = [labels[i] for i in second_indices if labels[i] != final_label]
            if second_labels:
                print(f"❗ You responded No to confirmation. Switching to: {second_labels[0]}")
                final_label = second_labels[0]

    return final_label, encode_answers_dict(user_answers_dict)


In [193]:
def run_career_quiz():
    print("✨ Welcome to the AI Career Path Advisor! ✨")

    # Cluster Level
    cluster_labels = list(cluster_questions.keys())
    Xc, yc = generate_synthetic_data(cluster_questions, cluster_labels)
    model_c = train_model(Xc, yc, len(cluster_labels))
    cluster, _ = ask_questions_with_model("cluster", cluster_questions, model_c, cluster_labels)
    print(f"\n🎯 Based on your interests, your top tech **Cluster** is: **{cluster}**.")

    # Domain Level
    domain_labels = domain_hierarchy.get(cluster, list(domain_questions.keys()))
    domain_pool = {d: domain_questions[d] for d in domain_labels if d in domain_questions}
    Xd, yd = generate_synthetic_data(domain_pool, domain_labels)
    model_d = train_model(Xd, yd, len(domain_labels))
    domain, _ = ask_questions_with_model("domain", domain_pool, model_d, domain_labels)
    print(f"\n📌 Within the {cluster} cluster, your best-fit **Domain** is: **{domain}**.")

    # Role Level
    role_labels = role_hierarchy.get(domain, list(role_questions.keys()))
    role_pool = {r: role_questions[r] for r in role_labels if r in role_questions}
    Xr, yr = generate_synthetic_data(role_pool, role_labels)
    model_r = train_model(Xr, yr, len(role_labels))
    role, _ = ask_questions_with_model("role", role_pool, model_r, role_labels)
    print(f"\n🚀 In the {domain} domain, your ideal **Role** could be: **{role}**.")

    # Final Output
    print("\n✅ Final Career Recommendation:")
    print(f"🔹 Cluster: {cluster}")
    print(f"🔹 Domain : {domain}")
    print(f"🔹 Role   : {role}")


In [194]:
# Run it
if __name__ == "__main__":
    run_career_quiz()

✨ Welcome to the AI Career Path Advisor! ✨

✨ Let's explore your interests for the cluster level! ✨
👉 Are you interested in developing software applications or building mobile apps? (Yes/No/Not Sure): yes
👉 Do you find data analysis, machine learning, or AI exciting? (Yes/No/Not Sure): yes
👉 Are you interested in IT infrastructure, cloud platforms, or network security? (Yes/No/Not Sure): no
👉 Are you fascinated by blockchain, IoT devices, AR/VR, or robotics? (Yes/No/Not Sure): no
👉 Are you interested in managing digital products or leading tech startups? (Yes/No/Not Sure): no

🔄 Tie detected among ['Software & Application Development', 'Data, AI & Analytics'], tie-break round 1
👉 (Tie-break) Do you enjoy working with programming languages and coding projects? (Yes/No/Not Sure): yes
👉 (Tie-break) Have you worked with data or predictive modeling tools before? (Yes/No/Not Sure): yes

🔄 Tie detected among ['Software & Application Development', 'Data, AI & Analytics'], tie-break round 2
👉 (