In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, GRU, Dense, Bidirectional, Concatenate, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Load dataset
data = pd.read_csv('/content/Sample_dataset_for training.csv')
data.columns

Index(['Experience', 'Qualification', 'Tokenized_Job_Title',
       'Tokenized_Job_Description', 'Tokenized_Skills', 'Tokenized_Resume',
       'Resume_Score', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [5]:
# Drop empty columns and rows with missing values
data = data.drop(columns=['Unnamed: 7', 'Unnamed: 8']).dropna()

# Prepare text data for input (Tokenized text columns)
text_columns = ['Tokenized_Job_Title', 'Tokenized_Job_Description', 'Tokenized_Skills', 'Tokenized_Resume']
text_data = data[text_columns].agg(' '.join, axis=1)  # Combine columns into a single text field

In [6]:
# Tokenization and Padding for text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
sequences = tokenizer.texts_to_sequences(text_data)
max_len = max(len(seq) for seq in sequences)
X_text = pad_sequences(sequences, maxlen=max_len, padding='post')

In [7]:
# Process `Experience` column
experience_data = data['Experience'].values.reshape(-1, 1)
scaler = StandardScaler()
X_experience = scaler.fit_transform(experience_data)

# Process `Qualification` column
label_encoder = LabelEncoder()
X_qualification = label_encoder.fit_transform(data['Qualification'])
X_qualification = np.expand_dims(X_qualification, axis=1)  # Reshape for model input


In [8]:
# Target variable
y = data['Resume_Score'].values

# Train-test split
X_text_train, X_text_test, X_exp_train, X_exp_test, X_qual_train, X_qual_test, y_train, y_test = train_test_split(
    X_text, X_experience, X_qualification, y, test_size=0.2, random_state=42
)

In [9]:
# Model architecture with combined features
def build_combined_model(model_type='LSTM'):
    # Text input branch
    text_input = Input(shape=(max_len,), name='text_input')
    embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_len)(text_input)

    if model_type == 'LSTM':
        text_branch = Bidirectional(LSTM(64))(embedding)
    elif model_type == 'GRU':
        text_branch = Bidirectional(GRU(64))(embedding)
    elif model_type == 'RNN':
        text_branch = Flatten()(embedding)

    # Numerical input branches
    experience_input = Input(shape=(1,), name='experience_input')
    qualification_input = Input(shape=(1,), name='qualification_input')

    # Concatenate all branches
    combined = Concatenate()([text_branch, experience_input, qualification_input])
    dense = Dense(64, activation='relu')(combined)
    output = Dense(1)(dense)  # Single output for Resume_Score

    model = Model(inputs=[text_input, experience_input, qualification_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    return model

In [10]:
# Train and evaluate each model
models = ['LSTM', 'GRU', 'RNN']
best_model = None
best_mae = float('inf')

In [11]:
for model_type in models:
    print(f"\nTraining {model_type} model...")
    model = build_combined_model(model_type=model_type)

    # Train the model
    model.fit(
        [X_text_train, X_exp_train, X_qual_train], y_train,
        epochs=10, batch_size=32, validation_split=0.1, verbose=1
    )

    # Evaluate the model on test data
    y_pred = model.predict([X_text_test, X_exp_test, X_qual_test])
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)

    print(f"{model_type} Model - MAE: {mae}, MSE: {mse}")

    # Track the best model based on MAE
    if mae < best_mae:
        best_mae = mae
        best_model = model



Training LSTM model...




Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - loss: 3293.8359 - mae: 56.0794 - val_loss: 2872.1426 - val_mae: 52.7044
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 3074.9766 - mae: 53.8878 - val_loss: 2201.9919 - val_mae: 45.9019
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - loss: 2233.2051 - mae: 45.6896 - val_loss: 1456.7887 - val_mae: 36.9044
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 1429.3483 - mae: 35.6281 - val_loss: 776.8044 - val_mae: 26.1171
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - loss: 810.5490 - mae: 25.4330 - val_loss: 323.3853 - val_mae: 15.1822
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - loss: 356.2679 - mae: 14.8220 - val_loss: 122.6413 - val_mae: 8.2752
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - loss: 3360.1853 - mae: 56.7080 - val_loss: 3034.7974 - val_mae: 54.2149
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - loss: 3251.5713 - mae: 55.5523 - val_loss: 2583.0244 - val_mae: 49.8675
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - loss: 2692.3027 - mae: 50.1341 - val_loss: 1539.4325 - val_mae: 37.9761
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - loss: 1587.9771 - mae: 37.6539 - val_loss: 731.2272 - val_mae: 25.2082
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - loss: 751.7014 - mae: 24.2793 - val_loss: 288.2708 - val_mae: 14.0475
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - loss: 360.8160 - mae: 14.9486 - val_loss: 109.2895 - val_mae: 7.9648
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step



[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 216ms/step - loss: 2227.0874 - mae: 42.2113 - val_loss: 445.2667 - val_mae: 19.2815
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 200ms/step - loss: 521.7360 - mae: 19.2968 - val_loss: 327.1376 - val_mae: 15.6212
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 203ms/step - loss: 296.3337 - mae: 14.1620 - val_loss: 204.2188 - val_mae: 11.6451
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 246ms/step - loss: 213.4550 - mae: 11.6464 - val_loss: 120.0467 - val_mae: 8.9433
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 173ms/step - loss: 141.4822 - mae: 9.2649 - val_loss: 78.0282 - val_mae: 7.0577
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 172ms/step - loss: 102.1929 - mae: 8.1416 - val_loss: 76.6586 - val_mae: 6.9332
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 215ms/st



[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 100ms/step



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
RNN Model - MAE: 8.120315074920654, MSE: 111.7302003038316


In [12]:
print("\nBest model based on MAE:", best_model)


Best model based on MAE: <Functional name=functional_2, built=True>
