In [1]:
import pandas as pd
import numpy as np

In [2]:
categorical_cols = [
    'Field of Study',
    'Current Occupation',
    'Gender',
    'Industry Growth Rate'
]

binary_cols = [
    'Career Change Interest',
    'Certifications',
    'Freelancing Experience',
    'Geographic Mobility'
]

numerical_cols = [
    'Age',
    'Years of Experience',
    'Job Satisfaction',
    'Work-Life Balance',
    'Job Opportunities',
    'Salary',
    'Job Security',
    'Skills Gap',
    'Professional Networks',
    'Career Change Events',
    'Technology Adoption'
]

target_col = 'Likely to Change Occupation'
df = pd.read_excel('Career data_PDA_4053.xlsx')

# basic preprocessing
df['Salary'] = pd.to_numeric(df['Salary'], errors = 'coerce')
df['Career Change Interest'] = pd.to_numeric(df['Career Change Interest'], errors = 'coerce')
df['Certifications'] = pd.to_numeric(df['Certifications'], errors = 'coerce')
df['Geographic Mobility'] = pd.to_numeric(df['Geographic Mobility'], errors = 'coerce')
df.dropna(how = 'any', inplace = True)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Field of Study,Current Occupation,Age,Gender,Years of Experience,Education Level,Industry Growth Rate,Job Satisfaction,Work-Life Balance,Job Opportunities,...,Job Security,Career Change Interest,Skills Gap,Certifications,Freelancing Experience,Geographic Mobility,Professional Networks,Career Change Events,Technology Adoption,Likely to Change Occupation
0,Medicine,Business Analyst,48.0,Male,7.0,High School,High,7.0,10.0,83.0,...,8.0,0.0,8.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0
1,Education,Economist,44.0,Male,26.0,Master's,Low,10.0,3.0,55.0,...,9.0,0.0,3.0,0.0,1.0,1.0,2.0,1.0,9.0,0.0
2,Arts,Doctor,28.0,Female,0.0,PhD,Low,3.0,1.0,8.0,...,3.0,0.0,5.0,0.0,1.0,0.0,2.0,0.0,7.0,1.0
3,Medicine,Lawyer,56.0,Male,39.0,Bachelor's,Low,3.0,1.0,99.0,...,3.0,0.0,10.0,0.0,0.0,0.0,10.0,0.0,3.0,1.0
4,Medicine,Software Developer,51.0,Female,29.0,High School,Low,7.0,4.0,13.0,...,7.0,0.0,6.0,1.0,0.0,1.0,4.0,1.0,10.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4935,Biology,Psychologist,51.0,Male,1.0,PhD,High,10.0,6.0,47.0,...,3.0,0.0,8.0,1.0,0.0,1.0,7.0,1.0,10.0,0.0
4936,Mechanical Engineering,Economist,35.0,Female,16.0,Master's,High,1.0,4.0,2.0,...,4.0,1.0,10.0,1.0,0.0,0.0,6.0,0.0,10.0,1.0
4937,Education,Doctor,30.0,Female,9.0,High School,Medium,8.0,1.0,55.0,...,4.0,0.0,5.0,0.0,0.0,0.0,5.0,2.0,9.0,0.0
4938,Economics,Economist,38.0,Male,6.0,Bachelor's,High,10.0,7.0,30.0,...,2.0,0.0,6.0,1.0,0.0,0.0,3.0,1.0,8.0,0.0


In [3]:

y = df['Likely to Change Occupation'].values.reshape(-1, 1)
X = df.drop(columns=['Likely to Change Occupation'])
print(df.head())
print(df.columns)

  Field of Study  Current Occupation   Age  Gender  Years of Experience  \
0       Medicine    Business Analyst  48.0    Male                  7.0   
1      Education           Economist  44.0    Male                 26.0   
2           Arts              Doctor  28.0  Female                  0.0   
3       Medicine              Lawyer  56.0    Male                 39.0   
4       Medicine  Software Developer  51.0  Female                 29.0   

  Education Level Industry Growth Rate  Job Satisfaction  Work-Life Balance  \
0     High School                 High               7.0               10.0   
1        Master's                  Low              10.0                3.0   
2             PhD                  Low               3.0                1.0   
3      Bachelor's                  Low               3.0                1.0   
4     High School                  Low               7.0                4.0   

   Job Opportunities  ...  Job Security  Career Change Interest  Skills Ga

In [4]:
# Encoding categorical values - encoding with numbers is not gonna cut it because that would imply a relationship,
# for example the model might understand that Teacher(encoded as say 0 or 1) is less important value than Doctor(encoded as 2 or 3)
# so we create new columns for each Teacher and Doctor and measure the value in booleans - 0 or 1 - this implies no relationship

nominal_cols = ['Field of Study', 'Current Occupation', 'Gender']
X = pd.get_dummies(X, columns=nominal_cols, drop_first=True)

# Encoding ordinal values - numbers do have meaning so it's fine
ordinal_mapping = {
    'Education Level': {'Low': 0, 'Medium': 1, 'High': 2},
    'Industry Growth Rate': {'Low': 0, 'Medium': 1, 'High': 2},
    'Education Level': {'High School': 0, 'Bachelor\'s': 1, 'Master\'s': 2, 'PhD': 3}
}

for col, mapping in ordinal_mapping.items():
    X[col] = X[col].map(mapping)

In [5]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# splitting the data into training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [7]:
# initializing parameters
np.random.seed(42)

n_features = X_train.shape[1] # this is number of input neurons - right now it's 36
hidden_units = 16 # this is the number of neurons in the hidden layer - duh.

W1 = np.random.randn(n_features, hidden_units) * 0.01 # generating weights that connect the inputs and the hidden layer - rn it's 36
b1 = np.zeros((1, hidden_units)) # bias for each neuron in the hidden layer

W2 = np.random.randn(hidden_units, 1) * 0.01 # generating weights that connect the hidden layer and the output neurons - rn it's 16
b2 = np.zeros((1, 1)) # bias for neurons in the output layer
n_features
# we use small random steps because:
# - it breaks symetry
# - avoids exploding activations
# - ensures that neurons learn different patterns

36

In [8]:
# defining activation functions
# for the hidden layer we are using relu, and for the output layer we're using sigmoid

def relu(z): # you know this one
  return np.maximum(0, z)

def relu_derivative(z):
  return (z > 0).astype(float)
  # This returns 0 or 1 - representing if a specific neuron was fired or not.
  # If it wasn't, there's no need to adjust its weight

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [9]:
# creating method for forward propagation - the predicting part
def forward(X):
    z1 = X @ W1 + b1 # linear combination for hidden layer; z1 is signal before activation functions
    a1 = relu(z1)

    z2 = a1 @ W2 + b2 # linear combination for output layer; a1 becomes the input signal this time
    y_hat = sigmoid(z2)

    return z1, a1, z2, y_hat

In [10]:
# The infamous loss function, or cross-entropy function - it basically penalizes confident mistakes heavily
# in simpler words, how wrong the network is
def binary_cross_entropy(y, y_hat):
    eps = 1e-8 # this is just to avoid log(0) and gives numerical stability
    return -np.mean(
        y * np.log(y_hat + eps) + (1 - y) * np.log(1 - y_hat + eps)
    )

In [11]:
# and here is the dreaded backpropagation - the learning part
def backward(X, y, z1, a1, y_hat):
    global W1, b1, W2, b2

    m = X.shape[0]

    dz2 = y_hat - y # how wrong the prediction is, similar to how we usually find error (prediction - actual value)
    dW2 = (a1.T @ dz2) / m # the weight, or how much, did each neuron contribute to that error
    db2 = np.mean(dz2, axis=0, keepdims=True) # this is the average error

    da1 = dz2 @ W2.T # hidden layer gradient
    dz1 = da1 * relu_derivative(z1) # blocks neurons where the neuron was inactive

    # below is the really important bit - calculates HOW each weight should change to reduce loss
    dW1 = (X.T @ dz1) / m
    db1 = np.mean(dz1, axis=0, keepdims=True)

    return dW1, db1, dW2, db2


In [12]:
# this is the learning process

learning_rate = 0.01
epochs = 2500
losses = []


for epoch in range(epochs):
    z1, a1, z2, y_hat = forward(X_train)
    loss = binary_cross_entropy(y_train, y_hat)
    losses.append(loss)

    dW1, db1, dW2, db2 = backward(X_train, y_train, z1, a1, y_hat)

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    if epoch % 200 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 0.6932
Epoch 200, Loss: 0.6872
Epoch 400, Loss: 0.6847
Epoch 600, Loss: 0.6832
Epoch 800, Loss: 0.6814
Epoch 1000, Loss: 0.6772
Epoch 1200, Loss: 0.6665
Epoch 1400, Loss: 0.6403
Epoch 1600, Loss: 0.5875
Epoch 1800, Loss: 0.5069
Epoch 2000, Loss: 0.4199
Epoch 2200, Loss: 0.3523
Epoch 2400, Loss: 0.3100


In [13]:
_, _, _, y_test_hat = forward(X_test)
preds = (y_test_hat > 0.5).astype(int)
print(len(_))
accuracy = np.mean(preds == y_test)
print("Test accuracy:", accuracy)


988
Test accuracy: 0.937246963562753


In [15]:
X.columns

Index(['Age', 'Years of Experience', 'Education Level', 'Industry Growth Rate',
       'Job Satisfaction', 'Work-Life Balance', 'Job Opportunities', 'Salary',
       'Job Security', 'Career Change Interest', 'Skills Gap',
       'Certifications', 'Freelancing Experience', 'Geographic Mobility',
       'Professional Networks', 'Career Change Events', 'Technology Adoption',
       'Field of Study_Biology', 'Field of Study_Business',
       'Field of Study_Computer Science', 'Field of Study_Economics',
       'Field of Study_Education', 'Field of Study_Law',
       'Field of Study_Mechanical Engineering', 'Field of Study_Medicine',
       'Field of Study_Psychology', 'Current Occupation_Biologist',
       'Current Occupation_Business Analyst', 'Current Occupation_Doctor',
       'Current Occupation_Economist', 'Current Occupation_Lawyer',
       'Current Occupation_Mechanical Engineer',
       'Current Occupation_Psychologist',
       'Current Occupation_Software Developer', 'Current Occu

In [16]:
import numpy as np
import plotly.graph_objects as go

def loss_fn(w1, w2, X, y):
    z = X[:, 0]*w1 + X[:, 1]*w2
    y_hat = 1 / (1 + np.exp(-z))
    eps = 1e-8
    return -np.mean(y*np.log(y_hat+eps) + (1-y)*np.log(1-y_hat+eps))

w1_vals = np.linspace(-3, 3, 50)
w2_vals = np.linspace(-3, 3, 50)

W1, W2 = np.meshgrid(w1_vals, w2_vals)

Z = np.array([
    loss_fn(w1, w2, X_train[:, :2], y_train)
    for w1, w2 in zip(W1.flatten(), W2.flatten())
]).reshape(W1.shape)

fig = go.Figure(data=[
    go.Surface(x=W1, y=W2, z=Z, colorscale='Viridis')
])

fig.update_layout(
    title='Loss Surface (Gradient Intuition)',
    scene=dict(
        xaxis_title='Weight 1',
        yaxis_title='Weight 2',
        zaxis_title='Loss'
    )
)

fig.show()
