In [5]:
import pandas as pd
import pickle

# Load model
print("Loading model from pickle file...")
with open('../Selection_Algorithms/stackoverflow_model.pkl', 'rb') as f:
    data = pickle.load(f)
    final_model = data['model']
print("Model loaded successfully!\n")


def predict_batch_candidates(csv_path):
    """Predict selection probability for all candidates in CSV file"""
    df_unseen = pd.read_csv(csv_path)
    
    print(f"Loaded {len(df_unseen)} unseen candidates from {csv_path}")
    print("\n" + "="*80)
    
    # CORRECT FEATURES - NO USERNAME!
    features = [ 'Reputation', 'Gold_Badges', 'Silver_Badges', 'Bronze_Badges']
    X_unseen = df_unseen[features]
    
    predictions = final_model.predict(X_unseen)
    probabilities = final_model.predict_proba(X_unseen)[:, 1] * 100
    
    df_unseen['AI_Decision'] = ['GOOD PICK' if pred == 1 else 'BAD PICK' for pred in predictions]
    df_unseen['Match_Score'] = probabilities.round(2)
    df_unseen = df_unseen.sort_values('Match_Score', ascending=False)
    
    return df_unseen


# Predict all candidates
print("BATCH PREDICTION - ALL CANDIDATES")
print("="*80)

results = predict_batch_candidates('../../Profile_Data/StackOverflow_Data_Unseen.csv')

print("\nALL CANDIDATES (Sorted by Match Score):")
print(results[['Display_Name', 'Reputation','AI_Decision', 'Match_Score']].to_string(index=False))

print("\n" + "="*80)
print("SUMMARY:")
print(f"Total Candidates: {len(results)}")
print(f"Good Picks: {len(results[results['AI_Decision'] == 'GOOD PICK'])}")
print(f"Bad Picks: {len(results[results['AI_Decision'] == 'BAD PICK'])}")

results.to_csv('../Shortlisted_candidates/StackOverflow_Predictions_Results(Unseen).csv', index=False)
print(f"\nResults saved!")

Loading model from pickle file...
Model loaded successfully!

BATCH PREDICTION - ALL CANDIDATES
Loaded 110 unseen candidates from ../../Profile_Data/StackOverflow_Data_Unseen.csv


ALL CANDIDATES (Sorted by Match Score):
             Display_Name  Reputation AI_Decision  Match_Score
                 anubhava      790847   GOOD PICK        100.0
                   Barmar      789273   GOOD PICK        100.0
                   marc_s      760370   GOOD PICK        100.0
         Jonathan Leffler      760152   GOOD PICK        100.0
   Ignacio Vazquez-Abrams      804202   GOOD PICK        100.0
              Lara Bailey      802638   GOOD PICK        100.0
              Felix Kling      820390   GOOD PICK        100.0
Christian C. Salvad&#243;      831602   GOOD PICK        100.0
      Sergey Kalinichenko      729337   GOOD PICK        100.0
                 JaredPar      759937   GOOD PICK        100.0
                   ccprog       22231    BAD PICK          0.0
        MaciejG&#243;rs