In [66]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pickle

In [67]:
data = pd.read_csv("resume_data.csv")
data.head()

Unnamed: 0,Candidate_ID,Education_Level,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Suitability
0,1,Master's,23,0.81,2,13,0.99,1
1,2,PhD,36,0.89,5,2,0.83,1
2,3,Bachelor's,39,0.56,6,5,0.77,1
3,4,PhD,29,0.74,7,8,0.65,0
4,5,PhD,18,0.95,10,3,0.82,0


In [68]:
data = data.drop(columns=['Candidate_ID'])

In [69]:
data.head()

Unnamed: 0,Education_Level,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Suitability
0,Master's,23,0.81,2,13,0.99,1
1,PhD,36,0.89,5,2,0.83,1
2,Bachelor's,39,0.56,6,5,0.77,1
3,PhD,29,0.74,7,8,0.65,0
4,PhD,18,0.95,10,3,0.82,0


In [70]:
#one hot encode 'Education_level' column
from sklearn.preprocessing import OneHotEncoder
onehot_encoder_edu = OneHotEncoder()
edu_encoder = onehot_encoder_edu.fit_transform(data[['Education_Level']])
edu_encoder

<10000x3 sparse matrix of type '<class 'numpy.float64'>'
	with 10000 stored elements in Compressed Sparse Row format>

In [71]:
onehot_encoder_edu.get_feature_names_out(['Education_Level'])

array(["Education_Level_Bachelor's", "Education_Level_Master's",
       'Education_Level_PhD'], dtype=object)

In [72]:
edu_encoder.toarray()

array([[0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       ...,
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [73]:
edu_encoded_df = pd.DataFrame(edu_encoder.toarray(),columns=onehot_encoder_edu.get_feature_names_out(['Education_Level']))
edu_encoded_df

Unnamed: 0,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,0.0,1.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,0.0,0.0,1.0
...,...,...,...
9995,0.0,0.0,1.0
9996,1.0,0.0,0.0
9997,0.0,1.0,0.0
9998,0.0,1.0,0.0


In [74]:
data = pd.concat([data.drop('Education_Level',axis=1),edu_encoded_df],axis=1)
data

Unnamed: 0,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Suitability,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,23,0.81,2,13,0.99,1,0.0,1.0,0.0
1,36,0.89,5,2,0.83,1,0.0,0.0,1.0
2,39,0.56,6,5,0.77,1,1.0,0.0,0.0
3,29,0.74,7,8,0.65,0,0.0,0.0,1.0
4,18,0.95,10,3,0.82,0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...
9995,27,0.57,3,1,0.69,1,0.0,0.0,1.0
9996,4,0.81,6,1,0.69,0,1.0,0.0,0.0
9997,11,0.87,7,0,0.73,1,0.0,1.0,0.0
9998,40,0.75,3,5,0.72,1,0.0,1.0,0.0


In [75]:
#save the encoder and scaler

with open('onehot_encoder_edu.pkl','wb') as file:
    pickle.dump(onehot_encoder_edu,file)

In [76]:
data.head()

Unnamed: 0,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Suitability,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,23,0.81,2,13,0.99,1,0.0,1.0,0.0
1,36,0.89,5,2,0.83,1,0.0,0.0,1.0
2,39,0.56,6,5,0.77,1,1.0,0.0,0.0
3,29,0.74,7,8,0.65,0,0.0,0.0,1.0
4,18,0.95,10,3,0.82,0,0.0,0.0,1.0


In [77]:
#devide the dataset into independent and dependent features
X = data.drop('Suitability',axis=1)
y = data['Suitability']

#split the data for training
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

#scale this features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test)


In [78]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler,file)

### ANN Implementation

In [79]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
import datetime

In [80]:
X_train.shape[1]

8

In [81]:
# Build our ANN Model
model = Sequential([
    Dense(64,activation='relu',input_shape=(X_train.shape[1],)), #HL1 connected to input layer
    Dense(32,activation='relu'), #HL2
    Dense(1,activation='sigmoid') #Output Layer
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [82]:
model.summary()

In [83]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
loss = tensorflow.keras.losses.BinaryCrossentropy()

In [84]:
# compile the model
model.compile(optimizer=opt,loss="binary_crossentropy",metrics=['accuracy'])

In [85]:
#set up the tensorboard
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%y%m%d-%H%M%S")
tensorflow_callback = TensorBoard(log_dir=log_dir,histogram_freq=1)


In [86]:
#Set up early stopping
early_stopping_callback = EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)


In [87]:
#Train the model

history = model.fit(
    X_train,y_train,validation_data = (X_test,y_test),epochs=100,
    callbacks=[tensorflow_callback,early_stopping_callback]
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.5086 - loss: 0.6986 - val_accuracy: 0.4975 - val_loss: 0.6933
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5028 - loss: 0.6934 - val_accuracy: 0.4995 - val_loss: 0.6927
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5032 - loss: 0.6933 - val_accuracy: 0.5090 - val_loss: 0.6941
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5037 - loss: 0.6939 - val_accuracy: 0.5060 - val_loss: 0.6932
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5258 - loss: 0.6924 - val_accuracy: 0.4890 - val_loss: 0.6938
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4961 - loss: 0.6942 - val_accuracy: 0.5090 - val_loss: 0.6930
Epoch 7/100
[1m250/25

In [88]:
model.save('model.h5')



In [89]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
%tensorboard --logdir logs/fit


In [93]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle
import pandas as pd
import numpy as np

In [94]:
model = load_model('model.h5')

with open('onehot_encoder_edu.pkl','rb') as file:
    onehot_encoder_edu=pickle.load(file)

with open('scaler.pkl','rb') as file:
    scaler=pickle.load(file)



In [137]:
input_data = {
    'Education_Level':"PhD",
    'Experience_Years':30,
    'Skills_Score':0.99,
    'Certifications':30,
    'Previous_Companies':20,
    'Job_Match_Score':0.99
}

In [138]:
edu_encoded = onehot_encoder_edu.transform([[input_data['Education_Level']]]).toarray()
edu_encoded_df = pd.DataFrame(edu_encoded,columns=onehot_encoder_edu.get_feature_names_out(['Education_Level']))
edu_encoded_df



Unnamed: 0,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,0.0,0.0,1.0


In [139]:
input_df = pd.DataFrame([input_data])
input_df

Unnamed: 0,Education_Level,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score
0,PhD,30,0.99,30,20,0.99


In [140]:
input_data = pd.concat([input_df.reset_index(drop=True),edu_encoded_df],axis=1)
input_data

Unnamed: 0,Education_Level,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,PhD,30,0.99,30,20,0.99,0.0,0.0,1.0


In [141]:
input_df = pd.concat([input_df.drop("Education_Level",axis=1),edu_encoded_df],axis=1)

In [142]:
input_df

Unnamed: 0,Experience_Years,Skills_Score,Certifications,Previous_Companies,Job_Match_Score,Education_Level_Bachelor's,Education_Level_Master's,Education_Level_PhD
0,30,0.99,30,20,0.99,0.0,0.0,1.0


In [143]:
input_scaled = scaler.transform(input_df)
input_scaled

array([[ 0.85776867,  1.66930226,  7.89053884,  2.69607837,  1.65002814,
        -0.70936189, -0.7051852 ,  1.41487673]])

In [144]:
prediction = model.predict(input_scaled)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step


array([[0.50025487]], dtype=float32)

In [145]:
prediction_probability = prediction[0][0]
prediction_probability

0.50025487

In [146]:
if prediction_probability>0.5:
    print("You are suitable for job")
else:
    print("You are not suitable for job")

You are suitable for job
