In [1]:

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import sqlite3


In [11]:

# Load recruitment dataset
def load_data(file_path):
    if os.path.exists(r"C:\Users\user\Downloads\Kdf Reccuitment\kdf_recruitment_criteria.csv"):
        df = pd.read_csv(r"C:\Users\user\Downloads\Kdf Reccuitment\kdf_recruitment_criteria.csv")
        df.dropna(inplace=True)
        df['experience_years'] = df['experience'].apply(lambda x: int(x.split()[0]))
        df.drop(columns=['experience'], inplace=True)
        print("Recruitment data loaded successfully.")
        return df
    else:
        print(f"Error: File not found at {file_path}. Please check the path.")
        return None


In [12]:

# Load recruitment eligibility criteria
def load_criteria(criteria_path):
    if os.path.exists(criteria_path):
        criteria_df = pd.read_csv(criteria_path)
        print("Recruitment criteria loaded successfully.")
        return criteria_df
    else:
        print(f"Error: File not found at {criteria_path}. Please check the path.")
        return None


In [13]:

# Train ML Model
def train_model(df):
    if df is None:
        print("Error: No data available for training.")
        return None
    
    required_columns = ['age', 'education_level', 'physical_fitness_score', 'experience_years', 'selected']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"Missing columns: {missing_columns}")
        return None

    X = df[['age', 'education_level', 'physical_fitness_score', 'experience_years']]
    y = df['selected']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    print("Training set size:", X_train.shape, y_train.shape)
    print("Test set size:", X_test.shape, y_test.shape)
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    return model


In [5]:

# Database setup
def init_db():
    conn = sqlite3.connect('recruitment.db')
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS candidates (
                        id INTEGER PRIMARY KEY, 
                        name TEXT, 
                        age INTEGER, 
                        education_level INTEGER, 
                        physical_fitness_score INTEGER, 
                        experience_years INTEGER, 
                        selected INTEGER)''')
    conn.commit()
    conn.close()
    print("Database initialized successfully.")


In [14]:

# Execute all functions in a Jupyter Notebook style
init_db()
data_path = r"C:\Users\user\Downloads\Kdf Reccuitment\recruitment_data.csv"
criteria_path = r"C:\Users\user\Downloads\Kdf Reccuitment\kdf_recruitment_criteria.csv"

df = load_data(data_path)
criteria_df = load_criteria(criteria_path)
model = train_model(df)


Database initialized successfully.


KeyError: 'experience'