#### Ranking with Gemini API

#### import libraries

#### Load dataset

In [47]:
import pandas as pd
import os
from google import genai
import re
import json


In [49]:
file_path = r"C:\Users\USER\Documents\Potential_Talent\potential-talents - Aspiring human resources - seeking human resources.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,id,job_title,location,connection,fit
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,


In [5]:
print(df.shape)

(104, 5)


#### Drop fit column

In [51]:
df = df.drop(columns = ['fit'])
df.head()

Unnamed: 0,id,job_title,location,connection
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+


#### Create text column from df

In [53]:

df["text"] = (
    df["job_title"].astype(str).str.lower() + ", " 
    + df["location"].astype(str).str.lower() 
    + " (connections: " + df["connection"].astype(str) + ")"
)

df.head()

Unnamed: 0,id,job_title,location,connection,text
0,1,2019 C.T. Bauer College of Business Graduate (...,"Houston, Texas",85,2019 c.t. bauer college of business graduate (...
1,2,Native English Teacher at EPIK (English Progra...,Kanada,500+,native english teacher at epik (english progra...
2,3,Aspiring Human Resources Professional,"Raleigh-Durham, North Carolina Area",44,"aspiring human resources professional, raleigh..."
3,4,People Development Coordinator at Ryan,"Denton, Texas",500+,"people development coordinator at ryan, denton..."
4,5,Advisory Board Member at Celal Bayar University,"İzmir, Türkiye",500+,advisory board member at celal bayar universit...


#### Extract text column from df and work with it

In [55]:
text_series = df["text"]

for t in text_series.head():
    print(t)

2019 c.t. bauer college of business graduate (magna cum laude) and aspiring human resources professional, houston, texas (connections: 85)
native english teacher at epik (english program in korea), kanada (connections: 500+ )
aspiring human resources professional, raleigh-durham, north carolina area (connections: 44)
people development coordinator at ryan, denton, texas (connections: 500+ )
advisory board member at celal bayar university, i̇zmir, türkiye (connections: 500+ )


#### split the dataset into chunks with 13 rows per chunk

In [57]:
chunk_size = 13
chunks = [text_series[i:i + chunk_size] for i in range(0, len(text_series), chunk_size)]

print(f"Number of chunks: {len(chunks)}")

for idx, chunk in enumerate(chunks):
    print(f"\nChunk {idx+1} (size {len(chunk)}):")
    for item in chunk:
        print(item)

Number of chunks: 8

Chunk 1 (size 13):
2019 c.t. bauer college of business graduate (magna cum laude) and aspiring human resources professional, houston, texas (connections: 85)
native english teacher at epik (english program in korea), kanada (connections: 500+ )
aspiring human resources professional, raleigh-durham, north carolina area (connections: 44)
people development coordinator at ryan, denton, texas (connections: 500+ )
advisory board member at celal bayar university, i̇zmir, türkiye (connections: 500+ )
aspiring human resources specialist, greater new york city area (connections: 1)
student at humber college and aspiring human resources generalist, kanada (connections: 61)
hr senior specialist, san francisco bay area (connections: 500+ )
student at humber college and aspiring human resources generalist, kanada (connections: 61)
seeking human resources hris and generalist positions, greater philadelphia area (connections: 500+ )
student at chapman university, lake forest, cal

#### Define the keywords, set instructions and apply them to the candidates

In [59]:

keywords = "aspiring human resources OR seeking human resources"

chunk_instructions = []

for chunk_idx, chunk in enumerate(chunks, start=1):

    instruction = f"""
Rank each candidate for relevance to: "{keywords}"

IMPORTANT:
- Output ONLY a JSON array.
- Each item must have "index" and "score".
- Scores must be continuous values strictly **greater than 0 and less than 1** (e.g., 0.01 to 0.99).
- Do NOT use binary scoring (0 or 1).
- Include ALL candidates.
- Round scores to EXACTLY 2 decimals.
- Do NOT truncate output.

Example (do NOT copy scores):
[
  {{"index": 1, "score": <score>}},
  {{"index": 2, "score": <score>}}
]

Candidates:
"""

    for i, text in enumerate(chunk, start=1):
        instruction += f"{i}. {text}\n"

    chunk_instructions.append(instruction)

    print(f"\n--- Chunk {chunk_idx} Instruction ---")
    print(instruction)


--- Chunk 1 Instruction ---

Rank each candidate for relevance to: "aspiring human resources OR seeking human resources"

IMPORTANT:
- Output ONLY a JSON array.
- Each item must have "index" and "score".
- Scores must be continuous values strictly **greater than 0 and less than 1** (e.g., 0.01 to 0.99).
- Do NOT use binary scoring (0 or 1).
- Include ALL candidates.
- Round scores to EXACTLY 2 decimals.
- Do NOT truncate output.

Example (do NOT copy scores):
[
  {"index": 1, "score": <score>},
  {"index": 2, "score": <score>}
]

Candidates:
1. 2019 c.t. bauer college of business graduate (magna cum laude) and aspiring human resources professional, houston, texas (connections: 85)
2. native english teacher at epik (english program in korea), kanada (connections: 500+ )
3. aspiring human resources professional, raleigh-durham, north carolina area (connections: 44)
4. people development coordinator at ryan, denton, texas (connections: 500+ )
5. advisory board member at celal bayar unive

#### Get the API key from environmental variables

In [61]:
api_key = os.getenv("GEMINI_API_KEY")
print(bool(api_key))

True


#### Configure the client

In [63]:
client = genai.Client(api_key=api_key)  

#### Specify the model

In [65]:

model_name = "models/gemini-2.5-flash"

print(f"Client configured. Ready to use model: {model_name}")

Client configured. Ready to use model: models/gemini-2.5-flash


#### define the model's behavior, work to do and call it.

In [67]:
# Store one output per chunk
chunk_outputs = []

for chunk_idx, instruction in enumerate(chunk_instructions, start=1):

    # Build prompt *exactly the same format as your working version*
    full_prompt = (
        "System: You are an expert HR ranking system. Output ONLY valid JSON.\n\n"
        "User:\n" + instruction
    )

    # Call Gemini
    response = client.models.generate_content(
        model=model_name,
        contents=full_prompt,
        config={
            "max_output_tokens": 4000,
            "temperature": 0.0,
        }
    )

    # Extract raw JSON text from model
    model_output_json = response.text
    chunk_outputs.append(model_output_json)

    # Preview
    print(f"\n--- RAW OUTPUT for Chunk {chunk_idx} ---")
    print(model_output_json[:2000])


--- RAW OUTPUT for Chunk 1 ---
```json
[
  {"index": 1, "score": 0.95},
  {"index": 2, "score": 0.01},
  {"index": 3, "score": 0.94},
  {"index": 4, "score": 0.50},
  {"index": 5, "score": 0.01},
  {"index": 6, "score": 0.93},
  {"index": 7, "score": 0.92},
  {"index": 8, "score": 0.60},
  {"index": 9, "score": 0.92},
  {"index": 10, "score": 0.96},
  {"index": 11, "score": 0.01},
  {"index": 12, "score": 0.70},
  {"index": 13, "score": 0.55}
]
```

--- RAW OUTPUT for Chunk 2 ---
```json
[
  {"index": 1, "score": 0.95},
  {"index": 2, "score": 0.95},
  {"index": 3, "score": 0.05},
  {"index": 4, "score": 0.95},
  {"index": 5, "score": 0.35},
  {"index": 6, "score": 0.95},
  {"index": 7, "score": 0.05},
  {"index": 8, "score": 0.95},
  {"index": 9, "score": 0.35},
  {"index": 10, "score": 0.05},
  {"index": 11, "score": 0.94},
  {"index": 12, "score": 0.94},
  {"index": 13, "score": 0.55}
]
```

--- RAW OUTPUT for Chunk 3 ---
```json
[
  {"index": 1, "score": 0.97},
  {"index": 2, "sco

#### Examine the raw output

In [69]:
print("RAW RESPONSE TEXT:\n", repr(model_output_json))

RAW RESPONSE TEXT:
 '```json\n[\n  {"index": 1, "score": 0.05},\n  {"index": 2, "score": 0.03},\n  {"index": 3, "score": 0.95},\n  {"index": 4, "score": 0.02},\n  {"index": 5, "score": 0.04},\n  {"index": 6, "score": 0.97},\n  {"index": 7, "score": 0.01},\n  {"index": 8, "score": 0.96},\n  {"index": 9, "score": 0.99},\n  {"index": 10, "score": 0.45},\n  {"index": 11, "score": 0.06},\n  {"index": 12, "score": 0.07},\n  {"index": 13, "score": 0.08}\n]\n```'


#### Extract clean json

In [71]:

def extract_json(raw_text):
    # Remove ```json and ```
    cleaned = re.sub(r"```json|```", "", raw_text).strip()
    return json.loads(cleaned)

#### parse the extracted json

In [73]:
parsed_chunks = [extract_json(chunk) for chunk in chunk_outputs]

# Merge chunks while preserving original IDs
all_scores = [item for chunk in parsed_chunks for item in chunk]

# Now 'all_scores' keeps the original dataset indices
print(all_scores[:10])  # preview first 10 horizontally

[{'index': 1, 'score': 0.95}, {'index': 2, 'score': 0.01}, {'index': 3, 'score': 0.94}, {'index': 4, 'score': 0.5}, {'index': 5, 'score': 0.01}, {'index': 6, 'score': 0.93}, {'index': 7, 'score': 0.92}, {'index': 8, 'score': 0.6}, {'index': 9, 'score': 0.92}, {'index': 10, 'score': 0.96}]


#### Convert the parsed json to pandas dataframe and add to original df

In [75]:
# Convert to a DataFrame
scores_df = pd.DataFrame(all_scores)

# Merge with original df on 'id' (assuming 'index' in all_scores matches 'id')
df['score'] = scores_df['score']

# Print id, text, and score
print(df[['id', 'text', 'score']].head(50))

    id                                               text  score
0    1  2019 c.t. bauer college of business graduate (...   0.95
1    2  native english teacher at epik (english progra...   0.01
2    3  aspiring human resources professional, raleigh...   0.94
3    4  people development coordinator at ryan, denton...   0.50
4    5  advisory board member at celal bayar universit...   0.01
5    6  aspiring human resources specialist, greater n...   0.93
6    7  student at humber college and aspiring human r...   0.92
7    8  hr senior specialist, san francisco bay area (...   0.60
8    9  student at humber college and aspiring human r...   0.92
9   10  seeking human resources hris and generalist po...   0.96
10  11  student at chapman university, lake forest, ca...   0.01
11  12  svp, chro, marketing & communications, csr off...   0.70
12  13  human resources coordinator at intercontinenta...   0.55
13  14  2019 c.t. bauer college of business graduate (...   0.95
14  15  2019 c.t. bauer c

#### Sort the candidates in acsending order

In [77]:
# Sort by score, highest first
df_sorted = df.sort_values(by='score', ascending=False)

# Reset index if you want a clean 0..n index
df_sorted = df_sorted.reset_index(drop=True)

# Print id, text, and score
print(df_sorted[['id', 'text', 'score']].head(50))

     id                                               text  score
0   100  aspiring human resources manager | graduating ...   0.99
1    40  seeking human resources hris and generalist po...   0.98
2    53  seeking human resources hris and generalist po...   0.98
3    73  aspiring human resources manager, seeking inte...   0.98
4    62  seeking human resources hris and generalist po...   0.98
5    82  aspiring human resources professional | an ene...   0.98
6    29  aspiring human resources management student se...   0.97
7    27  aspiring human resources management student se...   0.97
8    44  2019 c.t. bauer college of business graduate (...   0.97
9    57  2019 c.t. bauer college of business graduate (...   0.97
10   58  aspiring human resources professional, raleigh...   0.97
11   97  aspiring human resources professional, kokomo,...   0.97
12   99  seeking human resources position, las vegas, n...   0.96
13   46  aspiring human resources professional, raleigh...   0.96
14   60  a

#### View the full length of the text and top 50 

In [79]:
pd.set_option('display.max_colwidth', None)

df_sorted[["id", "text", "score"]].head(50)

Unnamed: 0,id,text,score
0,100,"aspiring human resources manager | graduating may 2020 | seeking an entry-level human resources position in st. louis, cape girardeau, missouri (connections: 103)",0.99
1,40,"seeking human resources hris and generalist positions, greater philadelphia area (connections: 500+ )",0.98
2,53,"seeking human resources hris and generalist positions, greater philadelphia area (connections: 500+ )",0.98
3,73,"aspiring human resources manager, seeking internship in human resources., houston, texas area (connections: 7)",0.98
4,62,"seeking human resources hris and generalist positions, greater philadelphia area (connections: 500+ )",0.98
5,82,"aspiring human resources professional | an energetic and team-focused leader, austin, texas area (connections: 174)",0.98
6,29,"aspiring human resources management student seeking an internship, houston, texas area (connections: 500+ )",0.97
7,27,"aspiring human resources management student seeking an internship, houston, texas area (connections: 500+ )",0.97
8,44,"2019 c.t. bauer college of business graduate (magna cum laude) and aspiring human resources professional, houston, texas (connections: 85)",0.97
9,57,"2019 c.t. bauer college of business graduate (magna cum laude) and aspiring human resources professional, houston, texas (connections: 85)",0.97


#### view the bottom 50

In [81]:
#pd.set_option('display.max_colwidth', None)
df_sorted[["id", "text", "score"]].tail(50)

Unnamed: 0,id,text,score
54,83,"hr manager at endemol shine north america, los angeles, california (connections: 268)",0.45
55,34,"people development coordinator at ryan, denton, texas (connections: 500+ )",0.45
56,101,"human resources generalist at loparex, raleigh-durham, north carolina area (connections: 500+ )",0.45
57,84,"human resources professional for the world leader in gis software, highland, california (connections: 50)",0.42
58,38,"hr senior specialist, san francisco bay area (connections: 500+ )",0.4
59,81,"senior human resources business partner at heil environmental, chattanooga, tennessee area (connections: 455)",0.4
60,67,"human resources, staffing and recruiting professional, jackson, mississippi area (connections: 500+ )",0.35
61,59,"people development coordinator at ryan, denton, texas (connections: 500+ )",0.35
62,47,"people development coordinator at ryan, denton, texas (connections: 500+ )",0.35
63,68,"human resources specialist at luxottica, greater new york city area (connections: 500+ )",0.35
