In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load Dataset
df = pd.read_csv("/workspaces/Project/Data_Job_Postings.csv")  # Update the file path



In [3]:
# Drop "Posted Date" as per request
df = df[['Job Title', 'Company', 'Rating', 'Reviews', 'Experience', 'Salary', 'Location', 'Description', 'Skills', 'Fradulent']]

# Handle missing values (Fill NA with a default value)
df.fillna({'Rating': 0, 'Reviews': 0, 'Experience': '0-1 yrs', 'Salary': 'Not Disclosed'}, inplace=True)

# Convert categorical data into numerical using Label Encoding
label_encoders = {}
for col in ['Job Title', 'Company', 'Experience', 'Salary', 'Location', 'Description', 'Skills']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))  # Convert to string and encode
    label_encoders[col] = le  # Store encoders for later use

# Define Features and Target


In [4]:
X = df.drop(columns=['Fradulent'])  # Features
y = df['Fradulent']  # Target Variable

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on Test Data
y_pred = rf_model.predict(X_test)

# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model for later use
joblib.dump(rf_model, "random_forest_naukri.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")


Model Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      3160
           1       1.00      1.00      1.00       620

    accuracy                           1.00      3780
   macro avg       1.00      1.00      1.00      3780
weighted avg       1.00      1.00      1.00      3780



['label_encoders.pkl']

In [5]:
# Load the saved model and encoders
rf_model = joblib.load("random_forest_naukri.pkl")
scaler = joblib.load("scaler.pkl")
label_encoders = joblib.load("label_encoders.pkl")

# Sample New Job Posting Data (without "Posted Date")
new_job = pd.DataFrame([{
    'Job Title': 'Software Engineer',
    'Company': 'XYZ Ltd',
    'Rating': 4.2,
    'Reviews': 120,
    'Experience': '2-5 yrs',
    'Salary': '10-15 LPA',
    'Location': 'Bangalore',
    'Description': 'Looking for experienced software engineers...',
    'Skills': 'Python, Machine Learning, AI'
}])

# Encode categorical columns
for col in ['Job Title', 'Company', 'Experience', 'Salary', 'Location', 'Description', 'Skills']:
    new_job[col] = label_encoders[col].transform(new_job[col].astype(str))  

# Scale features
new_job_scaled = scaler.transform(new_job)

# Predict
prediction = rf_model.predict(new_job_scaled)
print("Prediction:", "Fake Job" if prediction[0] == 1 else "Genuine Job")


ValueError: y contains previously unseen labels: 'XYZ Ltd'

In [7]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load Dataset (Replace with your dataset path)
df = pd.read_csv("/workspaces/Project/Data_Job_Postings.csv")

# Drop 'Posted Date' as per your requirement
df.drop(columns=['Posted Date'], inplace=True, errors='ignore')

# Handle missing values
df.fillna("Unknown", inplace=True)

# Encode categorical columns (Description, Experience, Salary)
categorical_cols = ['Description', 'Experience', 'Salary']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le  # Store encoders for future use

# Features and Target
X = df[['Rating', 'Reviews', 'Description', 'Experience', 'Salary']]  # Selected Features
y = df['Fradulent']  # Target column (1 = Fake, 0 = Genuine)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Make Predictions
y_pred = rf_model.predict(X_test_scaled)

# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save Model, Scaler & Encoders
joblib.dump(rf_model, "random_forest_naukri.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

# Load the model for future predictions
rf_model = joblib.load("random_forest_naukri.pkl")
scaler = joblib.load("scaler.pkl")
label_encoders = joblib.load("label_encoders.pkl")

# Sample New Job Posting Data
new_job = pd.DataFrame([{
    'Rating': 4.5,
    'Reviews': 250,
    'Description': 'Looking for experienced AI developers...',
    'Experience': '2-5 yrs',
    'Salary': '10-15 LPA'
}])

# Encode categorical columns
for col in ['Description', 'Experience', 'Salary']:
    if new_job[col].iloc[0] in label_encoders[col].classes_:
        new_job[col] = label_encoders[col].transform(new_job[col].astype(str))
    else:
        new_job[col] = -1  # Handle unseen labels

# Scale Features
new_job_scaled = scaler.transform(new_job)

# Predict
prediction = rf_model.predict(new_job_scaled)
print("Prediction:", "Fake Job" if prediction[0] == 1 else "Genuine Job")


Model Accuracy: 1.00
Prediction: Fake Job


In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load Dataset (Replace with your dataset path)
df = pd.read_csv("/workspaces/Project/Data_Job_Postings.csv")

# Drop 'Posted Date' as per your requirement
df.drop(columns=['Posted Date'], inplace=True, errors='ignore')

# Handle missing values
df.fillna("Unknown", inplace=True)

# Encode categorical columns (Description, Experience, Salary)
categorical_cols = ['Description', 'Experience', 'Salary']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le  # Store encoders for future use

# Features and Target
X = df[['Rating', 'Reviews', 'Description', 'Experience', 'Salary']]  # Selected Features
y = df['Fradulent']  # Target column (1 = Fake, 0 = Genuine)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Naïve Bayes Classifier
nb_model = GaussianNB()
nb_model.fit(X_train_scaled, y_train)

# Make Predictions
y_pred = nb_model.predict(X_test_scaled)

# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Save Model, Scaler & Encoders
joblib.dump(nb_model, "naive_bayes_naukri.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

# Load the model for future predictions
nb_model = joblib.load("naive_bayes_naukri.pkl")
scaler = joblib.load("scaler.pkl")
label_encoders = joblib.load("label_encoders.pkl")

# Sample New Job Posting Data
new_job = pd.DataFrame([{
    'Rating': 3,
    'Reviews': 450,
    'Description': 'Looking for experienced AI developers...',
    'Experience': '2-5 yrs',
    'Salary': '10-15 LPA'
}])

# Encode categorical columns
for col in ['Description', 'Experience', 'Salary']:
    if new_job[col].iloc[0] in label_encoders[col].classes_:
        new_job[col] = label_encoders[col].transform(new_job[col].astype(str))
    else:
        new_job[col] = -1  # Handle unseen labels

# Scale Features
new_job_scaled = scaler.transform(new_job)

# Predict
prediction = nb_model.predict(new_job_scaled)
print("Prediction:", "Fake Job" if prediction[0] == 1 else "Genuine Job")
