# Imports

In [2]:
from ucimlrepo import fetch_ucirepo 

import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score

from tqdm import tqdm

import ollama

# Parameters

In [4]:
# Number of rows that LLM will classify
n_rows = 5000

In [5]:
# We will use the LLM generated columns from the best found LLM model earlier
best_model = 'mistral'

# LLMs
models = ['llama3.2:1b', 'llama3.2:3b', 'gemma3:1b', 'gemma3:4b', 'llama3.1', 'dolphin3', 'mistral', 'deepseek-llm']

In [6]:
# list of all letters to stop on (LLM should only return 0 or 1)
stop_chars = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

In [7]:
# List of ml models
ml_models = [
    ('Logistic Regression', LogisticRegression(random_state=42)),
    ('Random Forest', RandomForestClassifier(random_state=42)),
    ('SVM', SVC(random_state=42)),
    ('KNN', KNeighborsClassifier()),
    ('Gradient Boosting', GradientBoostingClassifier(random_state=42))
]

In [8]:
# CSV file names
base_data_prep_name = "tabular_data_preprocessed_2025_04_03.csv"
llm_data_raw_name = "tabular_data_llm_raw_2025_04_03.csv"
llm_data_prep_name = "tabular_data_llm_preprocessed_2025_04_03.csv"

In [9]:
# Force CUDA usage
os.environ["OLLAMA_BACKEND"] = "cuda"
os.environ["OLLAMA_NUM_THREADS"] = "16"

# Load Data

In [11]:
llm_df = pd.read_csv(llm_data_raw_name)

In [12]:
llm_df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,...,job_security_rating_mistral,cultural_integration_score_mistral,career_stage_classification_deepseek_llm,occupational_demand_outlook_deepseek_llm,education_roi_deepseek_llm,years_of_experience_deepseek_llm,socio_economic_mobility_deepseek_llm,job_vs_education_match_deepseek_llm,job_security_rating_deepseek_llm,cultural_integration_score_deepseek_llm
0,18,Private,423024,HS-grad,9,Never-married,Other-service,Not-in-family,White,Male,...,4,5,7,9,4,4,6,8,9,4
1,17,Private,178953,12th,8,Never-married,Sales,Own-child,White,Female,...,4,5,4,4,9,4,4,5,5,4
2,25,Local-gov,348986,HS-grad,9,Never-married,Handlers-cleaners,Other-relative,Black,Male,...,7,5,5,1,5,2,5,9,9,1
3,20,Private,218215,Some-college,10,Never-married,Sales,Own-child,White,Female,...,5,7,4,4,2,4,4,9,4,4
4,47,Private,244025,HS-grad,9,Never-married,Machine-op-inspct,Unmarried,Amer-Indian-Eskimo,Male,...,7,7,4,4,9,4,4,4,4,4


# Baseline LLM classification experiment
Classify whether people earn more or less than 50k dollars using just the LLM and an NLG description of the data.
We will classify again on 2 datasets:

1. Base data set
2. LLM data set, this is the set with LLM generated columns

### Base columns

In [15]:
base_cols = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation', 'relationship', 
             'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'nlg', 'income']

### LLM generated columns

In [17]:
best_model_suffix = f"_{best_model.replace(':', '_')}"

column_names_llm_generated = [
    "career_stage_classification",
    "occupational_demand_outlook",
    "education_roi",
    "years_of_experience",
    "socio_economic_mobility",
    "job_vs_education_match",
    "job_security_rating",
    "cultural_integration_score"
]

# Add suffix to all column names
column_names_llm_generated_suffix = [name + best_model_suffix for name in column_names_llm_generated]

## Final column selection

In [19]:
llm_df = llm_df[base_cols + column_names_llm_generated_suffix]
llm_df.columns = [col.replace(best_model_suffix, '') for col in llm_df.columns]
print(llm_df.columns)
llm_df.head()

Index(['age', 'workclass', 'fnlwgt', 'education', 'education_num',
       'marital_status', 'occupation', 'relationship', 'race', 'sex',
       'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
       'nlg', 'income', 'career_stage_classification',
       'occupational_demand_outlook', 'education_roi', 'years_of_experience',
       'socio_economic_mobility', 'job_vs_education_match',
       'job_security_rating', 'cultural_integration_score'],
      dtype='object')


Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,...,nlg,income,career_stage_classification,occupational_demand_outlook,education_roi,years_of_experience,socio_economic_mobility,job_vs_education_match,job_security_rating,cultural_integration_score
0,18,Private,423024,HS-grad,9,Never-married,Other-service,Not-in-family,White,Male,...,"18-year-old Male from United-States, Never-mar...",<=50K,1,6,3,1,4,3,4,5
1,17,Private,178953,12th,8,Never-married,Sales,Own-child,White,Female,...,"17-year-old Female from United-States, Never-m...",<=50K,1,5,6,1,5,4,4,5
2,25,Local-gov,348986,HS-grad,9,Never-married,Handlers-cleaners,Other-relative,Black,Male,...,"25-year-old Male from United-States, Never-mar...",<=50K,3,6,3,3,4,3,7,5
3,20,Private,218215,Some-college,10,Never-married,Sales,Own-child,White,Female,...,"20-year-old Female from United-States, Never-m...",<=50K,1,6,7,1,6,7,5,7
4,47,Private,244025,HS-grad,9,Never-married,Machine-op-inspct,Unmarried,Amer-Indian-Eskimo,Male,...,"47-year-old Male from Puerto-Rico, Never-marri...",<=50K,7,7,3,8,4,4,7,7


# NLG for LLM generated columns
This is done here and not in the preprocessing, because we just want to test the best feature set.
Though we will try with multiple LLMs to see the classification error (more on that later).

In [21]:
# NLG template
template = (
    # Base columns
    "{age} year old {sex} from {native_country}, {marital_status}, "
    "works as a {occupation} in the {workclass} sector. "
    "With a {education} education, "
    "they identify as {race} and are {relationship} in their household. "
    "They work {hours_per_week} hours weekly, with a capital gain this year of {capital_gain} dollars "
    "and a capital loss this year of {capital_loss} dollars. "
    
    # LLM generated columns
    "Currently in the {career_stage_classification} stage of their career, "
    "they work in a field with {occupational_demand_outlook} job prospects. "
    "Their education has provided {education_roi} return on investment, "
    "and they have {years_of_experience} years of professional experience. "
    "Their career offers {socio_economic_mobility} socioeconomic mobility potential, "
    "with {job_vs_education_match} alignment between their education and current job. "
    "They enjoy {job_security_rating} job security "
    "and report {cultural_integration_score} cultural integration in their workplace."
)

### Mapping for NLG

In [23]:
# Mapping dictionaries to convert numerical scores to descriptive phrases
score_mappings = {
    "career_stage_classification": {
        1: "early/entry-level",
        2: "early/entry-level",
        3: "early/entry-level",
        4: "developing",
        5: "developing",
        6: "mid-career",
        7: "mid-career",
        8: "advanced",
        9: "advanced",
        10: "expert/leadership"
    },
    "occupational_demand_outlook": {
        1: "very poor",
        2: "poor",
        3: "below average",
        4: "slightly below average",
        5: "average",
        6: "slightly above average",
        7: "good",
        8: "very good",
        9: "excellent",
        10: "outstanding"
    },
    "education_roi": {
        1: "very low",
        2: "low",
        3: "below average",
        4: "slightly below average",
        5: "moderate",
        6: "slightly above average",
        7: "good",
        8: "very good",
        9: "excellent",
        10: "exceptional"
    },
    "years_of_experience": {
        1: "0-2",
        2: "2-4",
        3: "4-6",
        4: "6-8",
        5: "8-10",
        6: "10-12",
        7: "12-15",
        8: "15-20",
        9: "20-25",
        10: "25+"
    },
    "socio_economic_mobility": {
        1: "very limited",
        2: "limited",
        3: "below average",
        4: "some",
        5: "moderate",
        6: "above average",
        7: "good",
        8: "very good",
        9: "excellent",
        10: "exceptional"
    },
    "job_vs_education_match": {
        1: "very poor",
        2: "poor",
        3: "below average",
        4: "somewhat below average",
        5: "moderate",
        6: "somewhat above average",
        7: "good",
        8: "very good",
        9: "excellent",
        10: "perfect"
    },
    "job_security_rating": {
        1: "very low",
        2: "low",
        3: "below average",
        4: "somewhat low",
        5: "average",
        6: "somewhat high",
        7: "high",
        8: "very high",
        9: "extremely high",
        10: "guaranteed"
    },
    "cultural_integration_score": {
        1: "very poor",
        2: "poor",
        3: "below average",
        4: "somewhat poor",
        5: "average",
        6: "somewhat good",
        7: "good",
        8: "very good",
        9: "excellent",
        10: "perfect"
    }
}

### NLG Function

In [25]:
def format_row_to_sentence(row):
    # Convert numerical scores to descriptive phrases
    formatted_row = row.copy()
    
    for col in score_mappings:
        if col in row:
            score = row[col]
            formatted_row[col] = score_mappings[col][score]
    
    return template.format(**formatted_row)

In [26]:
# Apply the function to each row
llm_df["nlg_llm_cols"] = llm_df.apply(format_row_to_sentence, axis=1)
for row in llm_df["nlg_llm_cols"].head():
    print(row)
    print()

18 year old Male from United-States, Never-married, works as a Other-service in the Private sector. With a HS-grad education, they identify as White and are Not-in-family in their household. They work 20 hours weekly, with a capital gain this year of 0 dollars and a capital loss this year of 0 dollars. Currently in the early/entry-level stage of their career, they work in a field with slightly above average job prospects. Their education has provided below average return on investment, and they have 0-2 years of professional experience. Their career offers some socioeconomic mobility potential, with below average alignment between their education and current job. They enjoy somewhat low job security and report average cultural integration in their workplace.

17 year old Female from United-States, Never-married, works as a Sales in the Private sector. With a 12th education, they identify as White and are Own-child in their household. They work 20 hours weekly, with a capital gain this 

# Preprocessing

### We only use the NLG columns from now and the target variable 

In [29]:
llm_df = llm_df[['nlg', 'nlg_llm_cols', 'income']]
llm_df.head()

Unnamed: 0,nlg,nlg_llm_cols,income
0,"18-year-old Male from United-States, Never-mar...","18 year old Male from United-States, Never-mar...",<=50K
1,"17-year-old Female from United-States, Never-m...","17 year old Female from United-States, Never-m...",<=50K
2,"25-year-old Male from United-States, Never-mar...","25 year old Male from United-States, Never-mar...",<=50K
3,"20-year-old Female from United-States, Never-m...","20 year old Female from United-States, Never-m...",<=50K
4,"47-year-old Male from Puerto-Rico, Never-marri...","47 year old Male from Puerto-Rico, Never-marri...",<=50K


### Encode target variable

In [31]:
# Clean the income column by stripping spaces and removing periods
llm_df['income'] = llm_df['income'].str.replace('.', '', regex=False).str.strip()

# Encode '>50K' as 1 and '<=50K' as 0
llm_df['income'] = llm_df['income'].map({'>50K': 1, '<=50K': 0})

# Classify based on NLG
We will classify both on the base and LLM extended (columns) data set

In [33]:
prompt = '''Task: Classify if the person's annual income exceeds 50k dollars based on their profile.
Return 1 if > 50k.
Return 0 if <= 50k.

Return only 0 or 1 with no extra text, explanations, or formatting.

Profile:
{nlg}'''

In [34]:
# For every model - with progress bar
for model in models:
    print("Current Model: " + model)

    # Instantiate classifaction columns
    llm_df['nlg_classification' + "_" + model] = None
    llm_df['nlg_llm_cols_classification' + "_" + model] = None

    # Error rate
    errors_base = 0
    errors_llm = 0
    
    # For every row - with progress bar
    for index, row in tqdm(llm_df.iterrows(), total=len(llm_df), desc=f"Processing rows for {model}"):
        # Retrieve NLG
        nlg_base_data = row['nlg']
        nlg_llm_data = row['nlg_llm_cols']
    
        # Format prompts
        prompt_base_data = prompt.format(nlg=nlg_base_data)
        prompt_llm_data = prompt.format(nlg=nlg_llm_data)

        # Until prediction is valid
        while True:
            try:
                # Generate Prediction for base data
                response = int(ollama.generate(model=model, prompt=prompt_base_data, options={"stop": stop_chars})['response']) # Stop when letter

                # Assert prediction is valid
                assert response == 1 or response == 0

                # Save prediction
                llm_df.at[index, 'nlg_classification' + "_" + model] = response

                # Stop while loop because prediction is valid
                break
            except Exception as e:
                errors_base += 1
       
        # Until prediction is valid         
        while True:
            try:
                # Generate Prediction for LLM data
                response = int(ollama.generate(model=model, prompt=prompt_llm_data, options={"stop": stop_chars})['response']) # Stop when letter
                
                # Assert prediction is valid
                assert response == 1 or response == 0
                
                # Save prediction
                llm_df.at[index, 'nlg_llm_cols_classification' + "_" + model] = response
                                
                # Stop while loop because prediction is valid
                break
            except Exception as e:
                errors_llm += 1

    print("Error rate base cols: ", round(errors_base / n_rows * 100, 2))
    print("Error rate llm cols: ", round(errors_llm / n_rows * 100, 2))    

Current Model: llama3.2:1b


Processing rows for llama3.2:1b: 100%|█████████████████████████████████████████████| 5000/5000 [18:05<00:00,  4.61it/s]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: llama3.2:3b


Processing rows for llama3.2:3b: 100%|███████████████████████████████████████████| 5000/5000 [1:04:22<00:00,  1.29it/s]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: gemma3:1b


Processing rows for gemma3:1b: 100%|███████████████████████████████████████████████| 5000/5000 [23:40<00:00,  3.52it/s]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: gemma3:4b


Processing rows for gemma3:4b: 100%|█████████████████████████████████████████████| 5000/5000 [1:33:48<00:00,  1.13s/it]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: llama3.1


Processing rows for llama3.1: 100%|██████████████████████████████████████████████| 5000/5000 [1:36:41<00:00,  1.16s/it]


Error rate base cols:  0.88
Error rate llm cols:  0.0
Current Model: dolphin3


Processing rows for dolphin3: 100%|██████████████████████████████████████████████| 5000/5000 [1:35:42<00:00,  1.15s/it]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: mistral


Processing rows for mistral: 100%|███████████████████████████████████████████████| 5000/5000 [1:05:44<00:00,  1.27it/s]


Error rate base cols:  0.0
Error rate llm cols:  0.0
Current Model: deepseek-llm


Processing rows for deepseek-llm: 100%|██████████████████████████████████████████| 5000/5000 [1:49:49<00:00,  1.32s/it]

Error rate base cols:  4.4
Error rate llm cols:  33.62





## Evaluate

In [36]:
# Function to train and evaluate models with multiple metrics
def evaluate_models_with_metrics(models, df):
    results = []

    for model in models:
        # Make sure its integer col
        df['nlg_classification_' + model] = pd.to_numeric(df['nlg_classification_' + model])
        df['nlg_llm_cols_classification_' + model] = pd.to_numeric(df['nlg_llm_cols_classification_' + model])

         # Store results for the models
        model_results = {
            # Model name
            'Model': model,
            
            # NLG base data 
            'Accuracy NLG base': accuracy_score(df['income'], df['nlg_classification_' + model]),
            'Test Precision NLG base': precision_score(df['income'], df['nlg_classification_' + model]),
            'Test Recall NLG base': recall_score(df['income'], df['nlg_classification_' + model]),
            'Test F1-Score NLG base': f1_score(df['income'], df['nlg_classification_' + model]),
            
            # NLG LLM data
            'Accuracy NLG LLM': accuracy_score(df['income'], df['nlg_llm_cols_classification_' + model]),
            'Precision NLG LLM': precision_score(df['income'], df['nlg_llm_cols_classification_' + model]),
            'Recall NLG LLM': recall_score(df['income'], df['nlg_llm_cols_classification_' + model]),
            'F1-Score NLG LLM': f1_score(df['income'], df['nlg_llm_cols_classification_' + model])
        }

        results.append(model_results)
        
    # Convert results to a pandas DataFrame
    results_df = pd.DataFrame(results)
    
    return results_df

In [37]:
eval_df = evaluate_models_with_metrics(models, llm_df)
display(eval_df)

Unnamed: 0,Model,Accuracy NLG base,Test Precision NLG base,Test Recall NLG base,Test F1-Score NLG base,Accuracy NLG LLM,Precision NLG LLM,Recall NLG LLM,F1-Score NLG LLM
0,llama3.2:1b,0.2802,0.238425,0.933108,0.379804,0.5628,0.190006,0.260796,0.219843
1,llama3.2:3b,0.2606,0.240832,0.989839,0.387407,0.45,0.26075,0.723963,0.383408
2,gemma3:1b,0.7872,0.653543,0.210838,0.318822,0.79,0.684507,0.205758,0.316406
3,gemma3:4b,0.34,0.260673,0.977138,0.411555,0.3488,0.25956,0.948349,0.407569
4,llama3.1,0.27,0.24372,0.994073,0.391464,0.3302,0.246965,0.895851,0.387191
5,dolphin3,0.5718,0.311321,0.670618,0.425235,0.6636,0.335306,0.431837,0.377498
6,mistral,0.3224,0.248347,0.9221,0.391304,0.482,0.276136,0.735817,0.401571
7,deepseek-llm,0.2418,0.236895,0.99492,0.382674,0.2654,0.236017,0.943268,0.377563


## (Fair) Comparison with ML models
The base dataset:
1.  We take the same test set as the set we classified with the LLMs. This will be refered to as the hold-out set.
2.  We take the training data which is all the data excluding the hold-out set.

The LLM datasset:
1. We have limited samples to train on for this dataset since ML models must be trained with the LLM generated columns
3. We will partition the data that the LLM classified partly as train and test.
4. This will inherently cause a missmatch between the lengths of the test set between ML and LLM classification. However, we will try to generate sufficient samples for a fair comparison

## Base data set 

In [40]:
# Load original data
original_base_data = pd.read_csv(base_data_prep_name)

# Use the seed to reconstruct the hold out set
hold_out_set = original_base_data.sample(n=n_rows, random_state=42) # As long as the same seed is used we get the same test set

# Create training set by dropping hold-out rows
training_set = original_base_data.drop(hold_out_set.index)

# Partition by X and y
training_set_y = training_set['income']
training_set_X = training_set.drop(['income'], axis=1)
hold_out_set_y = hold_out_set['income']
hold_out_set_X = hold_out_set.drop(['income'], axis=1)

# Print the shapes of the sets
print(f"Training Set: X_train shape = {training_set_X.shape}, y_train shape = {training_set_y.shape}")
print(f"Test Set: X_test shape = {hold_out_set_X.shape}, y_test shape = {hold_out_set_y.shape}")

Training Set: X_train shape = (43842, 24), y_train shape = (43842,)
Test Set: X_test shape = (5000, 24), y_test shape = (5000,)


In [41]:
# Function to train and evaluate models with multiple metrics
def evaluate_models_with_metrics(models, X_train, y_train, X_test, y_test):
    results = []
    
    for name, model in models:
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on test set
        y_test_pred = model.predict(X_test)

        # Store results for the model
        model_results = {
            'Model': name,
            'Test Accuracy': accuracy_score(y_test, y_test_pred),
            'Test Precision': precision_score(y_test, y_test_pred),
            'Test Recall': recall_score(y_test, y_test_pred),
            'Test F1-Score': f1_score(y_test, y_test_pred)
        }
        
        results.append(model_results)

    # Convert results to a pandas DataFrame
    results_df = pd.DataFrame(results)
    
    return results_df

In [42]:
# Evaluate models with multiple metrics and print results
base_data_results = evaluate_models_with_metrics(ml_models, training_set_X, training_set_y, hold_out_set_X, hold_out_set_y)
display(base_data_results)
display(eval_df[['Model', 'Accuracy NLG base', 'Test Precision NLG base', 'Test Recall NLG base', 'Test F1-Score NLG base']])

Unnamed: 0,Model,Test Accuracy,Test Precision,Test Recall,Test F1-Score
0,Logistic Regression,0.8404,0.713966,0.541067,0.615607
1,Random Forest,0.8502,0.717742,0.602879,0.655315
2,SVM,0.8442,0.743341,0.519898,0.611858
3,KNN,0.8296,0.667345,0.555461,0.606285
4,Gradient Boosting,0.8662,0.783186,0.599492,0.679137


Unnamed: 0,Model,Accuracy NLG base,Test Precision NLG base,Test Recall NLG base,Test F1-Score NLG base
0,llama3.2:1b,0.2802,0.238425,0.933108,0.379804
1,llama3.2:3b,0.2606,0.240832,0.989839,0.387407
2,gemma3:1b,0.7872,0.653543,0.210838,0.318822
3,gemma3:4b,0.34,0.260673,0.977138,0.411555
4,llama3.1,0.27,0.24372,0.994073,0.391464
5,dolphin3,0.5718,0.311321,0.670618,0.425235
6,mistral,0.3224,0.248347,0.9221,0.391304
7,deepseek-llm,0.2418,0.236895,0.99492,0.382674


## LLM data set 

In [44]:
# Load in the preprocessed set that we have classified with LLM, however now we load it preprocessed such that ML can classify it
llm_data_prep = pd.read_csv(llm_data_prep_name).drop("nlg", axis=1)

In [45]:
# Split the data into training and test sets
X_train_llm, X_test_llm, y_train_llm, y_test_llm = train_test_split(llm_data_prep.drop('income', axis=1), llm_data_prep['income'], 
                                                                    test_size=0.2, random_state=42)

# Print the shapes of the sets
print(f"Training Set: X_train shape = {X_train_llm.shape}, y_train shape = {y_train_llm.shape}")
print(f"Test Set: X_test shape = {X_test_llm.shape}, y_test shape = {y_test_llm.shape}")

Training Set: X_train shape = (4000, 88), y_train shape = (4000,)
Test Set: X_test shape = (1000, 88), y_test shape = (1000,)


In [46]:
# Evaluate models with multiple metrics and print results
llm_data_prep_results = evaluate_models_with_metrics(ml_models, X_train_llm, y_train_llm, X_test_llm, y_test_llm)
display(llm_data_prep_results)
display(eval_df[['Model', 'Accuracy NLG LLM', 'Precision NLG LLM', 'Recall NLG LLM', 'F1-Score NLG LLM']])

Unnamed: 0,Model,Test Accuracy,Test Precision,Test Recall,Test F1-Score
0,Logistic Regression,0.838,0.732323,0.570866,0.641593
1,Random Forest,0.837,0.763006,0.519685,0.618267
2,SVM,0.843,0.759358,0.559055,0.643991
3,KNN,0.809,0.622568,0.629921,0.626223
4,Gradient Boosting,0.847,0.751244,0.594488,0.663736


Unnamed: 0,Model,Accuracy NLG LLM,Precision NLG LLM,Recall NLG LLM,F1-Score NLG LLM
0,llama3.2:1b,0.5628,0.190006,0.260796,0.219843
1,llama3.2:3b,0.45,0.26075,0.723963,0.383408
2,gemma3:1b,0.79,0.684507,0.205758,0.316406
3,gemma3:4b,0.3488,0.25956,0.948349,0.407569
4,llama3.1,0.3302,0.246965,0.895851,0.387191
5,dolphin3,0.6636,0.335306,0.431837,0.377498
6,mistral,0.482,0.276136,0.735817,0.401571
7,deepseek-llm,0.2654,0.236017,0.943268,0.377563
