<a href="https://colab.research.google.com/github/Bharathi3009/Talent-Opportunity-Index/blob/main/TOI_Simple_Mathematical_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('/content/AI_Resume_Screening.csv')

# Display the first few rows of the DataFrame
display(df.head())

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

# Simple preprocessing
df['Experience (Years)'] = df['Experience (Years)'].fillna(df['Experience (Years)'].mean())
df['Education'] = df['Education'].fillna('Bachelor')
df['Skills'] = df['Skills'].fillna('')

# Feature engineering
df['num_skills'] = df['Skills'].apply(lambda x: 0 if x.strip() == '' else len(x.split(',')))
df.drop(columns=['Skills'], inplace=True)

edu_map = {'Bachelor': 0, 'Master': 1, 'PhD': 2}
df['education_code'] = df['Education'].map(edu_map)
df.drop(columns=['Education'], inplace=True)

scaler = StandardScaler()
df['experience_norm'] = scaler.fit_transform(df[['Experience (Years)']])
df.drop(columns=['Experience (Years)'], inplace=True)

X = df[['experience_norm', 'num_skills', 'education_code']]
df['Hire'] = df['Recruiter Decision'].map({'Hire': 1, 'Reject': 0})
y = df['Hire']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Define simple weights for features (these can be tuned manually)
w_experience = 0.5
w_skills = 0.3
w_education = 0.2

# Calculate TOI as weighted sum of normalized features
def compute_toi(X):
    return (
        X['experience_norm'] * w_experience +
        X['num_skills'] * w_skills +
        X['education_code'] * w_education
    )

# Compute TOI scores for test data
toi_scores = compute_toi(X_test)

# Normalize TOI scores to 0–1 range
toi_scores_norm = (toi_scores - np.min(toi_scores)) / (np.max(toi_scores) - np.min(toi_scores))

# Use threshold of 0.5 to classify Hire vs. Reject
y_pred = (toi_scores_norm >= 0.5).astype(int)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score

# Load dataset
df = pd.read_csv('/content/AI_Resume_Screening.csv')

# Simple preprocessing
df['Experience (Years)'] = df['Experience (Years)'].fillna(df['Experience (Years)'].mean())
df['Education'] = df['Education'].fillna('Bachelor')
df['Skills'] = df['Skills'].fillna('')

# Feature engineering
df['num_skills'] = df['Skills'].apply(lambda x: 0 if x.strip() == '' else len(x.split(',')))
edu_map = {'Bachelor': 0.6, 'Master': 0.8, 'PhD': 1.0}  # Smarter scores than 0/1/2
# Handle unmapped values by setting them to NaN, then fill NaN
df['education_score'] = df['Education'].map(edu_map, na_action='ignore') # Corrected na_action
df['education_score'] = df['education_score'].fillna(0) # Fill any remaining NaNs with 0

# Normalize experience and num_skills
scaler = MinMaxScaler()
df[['experience_norm', 'num_skills_norm']] = scaler.fit_transform(df[['Experience (Years)', 'num_skills']])
df.drop(columns=['Experience (Years)', 'Skills', 'Education'], inplace=True)

# Binary target
df['Hire'] = df['Recruiter Decision'].map({'Hire': 1, 'Reject': 0})
df.drop(columns=['Recruiter Decision'], inplace=True)


# Compute correlation of each feature with Hire
correlations = df[['experience_norm', 'num_skills_norm', 'education_score']].corrwith(df['Hire'])
print("Feature correlations with Hire:", correlations)

# Set weights proportional to correlation (normalized to sum to 1)
weights = correlations.abs() / correlations.abs().sum()
print("Computed weights:", weights)


def compute_improved_toi(X):
    return (
        X['experience_norm'] * weights['experience_norm'] +
        X['num_skills_norm'] * weights['num_skills_norm'] +
        X['education_score'] * weights['education_score']
    )

# Split data
X = df[['experience_norm', 'num_skills_norm', 'education_score']]
y = df['Hire']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Compute TOI
toi_scores = compute_improved_toi(X_test)

# Normalize TOI between 0 and 1
toi_scores_norm = (toi_scores - np.min(toi_scores)) / (np.max(toi_scores) - np.min(toi_scores))

# Use threshold (tunable) to classify
threshold = 0.5
y_pred = (toi_scores_norm >= threshold).astype(int)

print("Improved Rule-Based Model Evaluation:")
print("Accuracy  :", accuracy_score(y_test, y_pred))
print("Precision :", precision_score(y_test, y_pred))
print("Recall    :", recall_score(y_test, y_pred))
print("ROC AUC   :", roc_auc_score(y_test, toi_scores_norm))

print("\nSample improved TOI scores (first 5):", toi_scores_norm[:5])

Feature correlations with Hire: experience_norm    0.576235
num_skills_norm    0.086422
education_score    0.017183
dtype: float64
Computed weights: experience_norm    0.847604
num_skills_norm    0.127121
education_score    0.025276
dtype: float64
Improved Rule-Based Model Evaluation:
Accuracy  : 0.68
Precision : 1.0
Recall    : 0.5932203389830508
ROC AUC   : 0.9188294491525425

Sample improved TOI scores (first 5): 521    0.826404
737    0.508562
740    0.487362
660    0.678083
411    0.364317
dtype: float64
