In [None]:
import pandas as pd
import pickle

# Load model
print("Loading model from pickle file...")
with open('../Selection_Algorithms/github_model.pkl', 'rb') as f:
    data = pickle.load(f)
    final_model = data['model']
print("Model loaded successfully!\n")


def predict_batch_candidates(csv_path):
    """Predict selection probability for all candidates in CSV file"""
    df_unseen = pd.read_csv(csv_path)
    
    print(f"Loaded {len(df_unseen)} unseen candidates from {csv_path}")
    print("\n" + "="*80)
    
    features = ['public_repos', 'followers', 'following', 'total_stars', 'total_forks']
    X_unseen = df_unseen[features]
    
    predictions = final_model.predict(X_unseen)
    probabilities = final_model.predict_proba(X_unseen)[:, 1] * 100
    
    df_unseen['AI_Decision'] = ['GOOD PICK' if pred == 1 else 'BAD PICK' for pred in predictions]
    df_unseen['Match_Score'] = probabilities.round(2)
    df_unseen = df_unseen.sort_values('Match_Score', ascending=False)
    
    return df_unseen


# Predict all candidates
print("BATCH PREDICTION - ALL CANDIDATES")
print("="*80)

results = predict_batch_candidates('../../Profile_Data/GitHub_Data_Unseen.csv')

print("\nALL CANDIDATES (Sorted by Match Score):")
print(results[['username', 'total_stars', 'followers', 'AI_Decision', 'Match_Score']].to_string(index=False))

print("\n" + "="*80)
print("SUMMARY:")
print(f"Total Candidates: {len(results)}")
print(f"Good Picks: {len(results[results['AI_Decision'] == 'GOOD PICK'])}")
print(f"Bad Picks: {len(results[results['AI_Decision'] == 'BAD PICK'])}")

results.to_csv('../Shortlisted_candidates/GitHub_Predictions_Results(Unseen).csv', index=False)
print(f"\nResults saved!")

Loading model from pickle file...
Model loaded successfully!

BATCH PREDICTION - ALL CANDIDATES
Loaded 307 unseen candidates from ../../Profile_Data/GitHub_Data_Unseen.csv


ALL CANDIDATES (Sorted by Match Score):
              username  total_stars  followers AI_Decision  Match_Score
             zenorocha        15051      11405   GOOD PICK        100.0
             yyx990803         3073     106879   GOOD PICK        100.0
              trekhleb       255134      17746   GOOD PICK        100.0
                tqchen          640      12455   GOOD PICK        100.0
                 tpope         4203      23121   GOOD PICK        100.0
             topjohnwu        65711      11503   GOOD PICK        100.0
             tomnomnom         6041      13576   GOOD PICK        100.0
             tjdevries         3359      13273   GOOD PICK        100.0
                    tj        15032      51530   GOOD PICK        100.0
              tiangolo         3646      30465   GOOD PICK        