In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle
import re

print("Libraries imported successfully ")


Libraries imported successfully 


In [1]:
print("TensorFlow version:")
print("Hello world")


TensorFlow version:
Hello world


In [3]:
bilstm_model = tf.keras.models.load_model("../models/bilstm_model.keras")
mlp_model = tf.keras.models.load_model("../models/mlp_model.keras")

print("Bi-LSTM model loaded ")
print("MLP decision model loaded ")


  saveable.load_own_variables(weights_store.get(inner_path))


Bi-LSTM model loaded 
MLP decision model loaded 


In [4]:
feature_extractor = tf.keras.Model(
    inputs=bilstm_model.input,
    outputs=bilstm_model.get_layer("bilstm_layer").output
)

print("Bi-LSTM feature extractor created")


Bi-LSTM feature extractor created


In [6]:
with open("../models/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

scaler = pickle.load(open("models/aux_scaler.pkl", "rb")) if False else None

print("Tokenizer loaded successfully")


Tokenizer loaded successfully


In [7]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"<.*?>", " ", text)
    text = re.sub(r"http\S+", " ", text)
    text = re.sub(r"[^a-z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

print("Text cleaning function ready")


Text cleaning function ready


In [8]:
manual_job_post = """
Urgent Hiring Alert ‚Äì Immediate Joiners Needed!
Multiple roles available including Developer, Designer, Data Analyst.
Remote work with flexible hours.
Salary range from 12k to 95k per month.
Freshers and experienced candidates are welcome.
Apply immediately.
"""

print("Manual job post received ")


Manual job post received 


In [9]:
MAX_LEN = 300

cleaned_text = clean_text(manual_job_post)
sequence = tokenizer.texts_to_sequences([cleaned_text])
padded_text = tf.keras.preprocessing.sequence.pad_sequences(
    sequence, maxlen=MAX_LEN, padding="post", truncating="post"
)

print("Text preprocessing completed ‚úÖ")
print("Padded text shape:", padded_text.shape)


Text preprocessing completed ‚úÖ
Padded text shape: (1, 300)


In [10]:
bilstm_features = feature_extractor.predict(padded_text)

print("Bi-LSTM semantic features extracted")
print("Bi-LSTM feature vector shape:", bilstm_features.shape)


[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 540ms/step
Bi-LSTM semantic features extracted
Bi-LSTM feature vector shape: (1, 256)


In [11]:
aux_features_demo = np.array([
    [
        0,  # company_profile_present
        0,  # company_website_present
        0,  # contact_email_present
        1,  # salary_range_present
        10, # num_open_positions
        0,  # required_experience_years
        len(manual_job_post.split()),  # text_length
        1,  # has_logo (assumed)
        1   # telecommuting
    ]
])

print("Auxiliary features prepared")
print("Auxiliary feature shape:", aux_features_demo.shape)


Auxiliary features prepared
Auxiliary feature shape: (1, 9)


In [12]:
X_final_demo = np.concatenate([bilstm_features, aux_features_demo], axis=1)

print("Hybrid feature vector created")
print("Final input shape to MLP:", X_final_demo.shape)


Hybrid feature vector created
Final input shape to MLP: (1, 265)


In [13]:
final_prediction = mlp_model.predict(X_final_demo)[0][0]

print("Final prediction score:", final_prediction)


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 270, but received input with shape (1, 265)[0m

Arguments received by Sequential.call():
  ‚Ä¢ inputs=tf.Tensor(shape=(1, 265), dtype=float32)
  ‚Ä¢ training=False
  ‚Ä¢ mask=None
  ‚Ä¢ kwargs=<class 'inspect._empty'>

In [15]:
# Load full auxiliary feature matrix used during training
X_aux_full = np.load("../data/processed/X_aux.npy")

print("Auxiliary feature matrix loaded ‚úÖ")
print("Aux feature shape (training):", X_aux_full.shape)


Auxiliary feature matrix loaded ‚úÖ
Aux feature shape (training): (3000, 14)


In [16]:
# Use mean auxiliary feature values as a neutral template
aux_template = X_aux_full.mean(axis=0).reshape(1, -1)

print("Auxiliary feature template created ‚úÖ")
print("Aux template shape:", aux_template.shape)


Auxiliary feature template created ‚úÖ
Aux template shape: (1, 14)


In [17]:
X_final_demo = np.concatenate(
    [bilstm_features, aux_template],
    axis=1
)

print("Hybrid feature vector created ‚úÖ")
print("Final input shape to MLP:", X_final_demo.shape)


Hybrid feature vector created ‚úÖ
Final input shape to MLP: (1, 270)


In [18]:
final_prediction = mlp_model.predict(X_final_demo)[0][0]

print("Final prediction score:", final_prediction)

if final_prediction >= 0.5:
    print("üö® FINAL RESULT: Suspicious / Fake Job")
else:
    print("‚úÖ FINAL RESULT: Legitimate Job")

print(f"Confidence Score: {final_prediction:.2f}")


[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 107ms/step
Final prediction score: 2.435433e-06
‚úÖ FINAL RESULT: Legitimate Job
Confidence Score: 0.00


In [None]:
# manual_job_text = """
# HR Priyanka

# 5,721 followers

# 3d

# X

# + Follow

# #Urgent Hiring Alert - #Immediate Joiners Needed!

# #Tech_cloud urgently #Hiring for multiple roles at a reputed American tech company. If you're looking to start your career switch to a #Remote role, this is your chance!

# or

# Last Date:- 17/01/2026

# We welcome both #Freshers and #ExperiencedProfessionals.

# Open positions- Full Stack #Developer, #Android Developer, #React Native Developer, #Web Developer, #Backend Developer, #Frontend Developer, UI/UX #Designer, #Graphic Designer, Data #Analyst, #Data Entry

# #Experience: 0-4 years

# #Working hours: Flexible

# #Income: 12k - 95k / Monthly (Based on Interview Performance)

# #Location: Remote

# Work schedule: 5 days a week Training will be provided for #Freshers.

# Note: Please respond only to this post if you're a #Freshers.
# """


# MAX_LEN = 300

# cleaned_text = clean_text(manual_job_text)

# sequence = tokenizer.texts_to_sequences([cleaned_text])

# padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(
#     sequence,
#     maxlen=MAX_LEN,
#     padding="post",
#     truncating="post"
# )


# prediction = bilstm_model.predict(padded_sequence)[0][0]

# print("Raw Prediction Score:", prediction)


# if prediction >= 0.5:
#     print("Prediction: üö® Suspicious / Fake Job")
# else:
#     print("Prediction: ‚úÖ Legitimate Job")

# print(f"Confidence Score: {prediction:.2f}")


[1m1/1[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 98ms/step
Raw Prediction Score: 0.009917326
Prediction: ‚úÖ Legitimate Job
Confidence Score: 0.01
