In [5]:
# ================================
# AI Resume Screening – ML Models
# ================================

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB


# --------------------------------
# Load Dataset
# --------------------------------
df = pd.read_csv("AI_Resume_Screening.csv")
df_raw = df.copy()   # keep raw data for NLP & rule-based models


# --------------------------------
# Encode Categorical Columns
# --------------------------------
label_encoder = LabelEncoder()
categorical_cols = ["Education", "Certifications", "Job Role", "Recruiter Decision"]

for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])

FileNotFoundError: [Errno 2] No such file or directory: 'AI_Resume_Screening.csv'

In [None]:

# =========================================================
# 1️⃣ Random Forest – Hire / Reject Prediction
# =========================================================

X = df.drop(["Resume_ID", "Name", "Skills", "Recruiter Decision"], axis=1)
y = df["Recruiter Decision"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

print("Random Forest Accuracy:", rf_model.score(X_test, y_test))


# --- Predict for a New Candidate ---
new_candidate = pd.DataFrame(
    [[5, 1, 2, 0, 90000, 6, 88]],
    columns=X.columns
)

rf_prediction = rf_model.predict(new_candidate)
print("Hiring Decision:", "Hire" if rf_prediction[0] == 1 else "Reject")

In [None]:

# =========================================================
# 2️⃣ Linear Regression – Salary Prediction
# =========================================================

X_salary = df[["Experience (Years)", "Projects Count", "AI Score (0-100)"]]
y_salary = df["Salary Expectation ($)"]

X_train, X_test, y_train, y_test = train_test_split(
    X_salary, y_salary, test_size=0.2, random_state=42
)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

predicted_salary = lr_model.predict([[5, 6, 88]])
print("Predicted Salary:", round(predicted_salary[0], 2))

In [None]:

# =========================================================
# 3️⃣ Decision Tree – Explainable Hire / Reject
# =========================================================

X_tree = df_raw[["Experience (Years)", "Projects Count", "AI Score (0-100)"]]
y_tree = df_raw["Recruiter Decision"].map({"Hire": 1, "Reject": 0})

dt_model = DecisionTreeClassifier(max_depth=3, random_state=42)
dt_model.fit(X_tree, y_tree)

dt_prediction = dt_model.predict([[1, 2, 40]])
print("Decision Tree Prediction:", "Hire" if dt_prediction[0] == 1 else "Reject")

In [None]:


# =========================================================
# 4️⃣ NLP + ML – Predict Job Role from Skills
# =========================================================

X_text = df_raw["Skills"]
y_text = df_raw["Job Role"]

vectorizer = CountVectorizer()
X_text_vec = vectorizer.fit_transform(X_text)

nb_model = MultinomialNB()
nb_model.fit(X_text_vec, y_text)

test_skills = vectorizer.transform(["Python TensorFlow NLP"])
print("Predicted Job Role:", nb_model.predict(test_skills)[0])