# **AI Virtual Career Counsellor**

## Neural Networks

In [None]:
# Install TensorFlow if needed
# !pip install tensorflow

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, StandardScaler
from sklearn.metrics import accuracy_score, f1_score, hamming_loss
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [2]:
# Load dataset
url = "https://raw.githubusercontent.com/SiddardhaShayini/Career-Recommender-Dataset/refs/heads/main/dataset/cleaned_dataset.csv"
df = pd.read_csv(url)

In [3]:
# Shuffle the dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [4]:
# Feature matrix and targets
X = df.iloc[:, :-2].values
y_courses = df['Courses']
y_career_options_raw = df['Career_Options']

In [5]:
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# Encode Courses (multi-class)
le_courses = LabelEncoder()
y_courses_encoded = le_courses.fit_transform(y_courses)
y_courses_cat = to_categorical(y_courses_encoded)

In [7]:
# Binarize Career_Options (multi-label)
y_career_options_split = y_career_options_raw.str.split(', ')
mlb = MultiLabelBinarizer()
y_career_encoded = mlb.fit_transform(y_career_options_split)

In [8]:
# Train/test split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_scaled, y_courses_cat, test_size=0.2, random_state=42)
X_train_m, X_test_m, y_train_m, y_test_m = train_test_split(X_scaled, y_career_encoded, test_size=0.2, random_state=42)


In [9]:
# Neural Network for Courses
model_courses = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_c.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y_train_c.shape[1], activation='softmax')
])
model_courses.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Neural Network for Career Options
model_career = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_m.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y_train_m.shape[1], activation='sigmoid')
])
model_career.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [11]:
# Train models
model_courses.fit(X_train_c, y_train_c, epochs=30, batch_size=32, verbose=1)
model_career.fit(X_train_m, y_train_m, epochs=30, batch_size=32, verbose=1)


Epoch 1/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4091 - loss: 2.9030
Epoch 2/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9927 - loss: 0.1775
Epoch 3/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9972 - loss: 0.0497
Epoch 4/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9958 - loss: 0.0343
Epoch 5/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9953 - loss: 0.0240
Epoch 6/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9975 - loss: 0.0161
Epoch 7/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9985 - loss: 0.0122
Epoch 8/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9974 - loss: 0.0128
Epoch 9/30
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7b426e75cc10>

In [12]:
# Make predictions
pred_courses = model_courses.predict(X_test_c)
pred_career = model_career.predict(X_test_m)

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


In [13]:
# Decode predictions
pred_courses_labels = np.argmax(pred_courses, axis=1)
true_courses_labels = np.argmax(y_test_c, axis=1)
pred_career_binary = (pred_career > 0.5).astype(int)


In [14]:
# Evaluation
courses_acc = accuracy_score(true_courses_labels, pred_courses_labels)
courses_f1 = f1_score(true_courses_labels, pred_courses_labels, average='macro')
career_hamming = hamming_loss(y_test_m, pred_career_binary)
career_f1 = f1_score(y_test_m, pred_career_binary, average='macro')


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [15]:
print("=== Neural Network Evaluation ===")
print(f"Courses - Accuracy: {courses_acc:.4f}, Macro F1: {courses_f1:.4f}")
print(f"Career Options - Hamming Loss: {career_hamming:.6f}, Macro F1: {career_f1:.4f}")


=== Neural Network Evaluation ===
Courses - Accuracy: 0.9929, Macro F1: 0.8525
Career Options - Hamming Loss: 0.000305, Macro F1: 0.8140


In [16]:
# Save the models locally
model_courses.save("courses_model.h5")
model_career.save("career_model.h5")




In [17]:
from google.colab import files

# Download models
files.download("courses_model.h5")
files.download("career_model.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>