In [None]:
## Initialise parameters and model endpoints
import vertexai
from vertexai.generative_models import GenerativeModel
from vertexai.tuning import sft


PROJECT_ID = "tech-talent-4-2bed493d"
project = "6174178765476724736"
location = "europe-west1"


vertexai.init(project=PROJECT_ID, location=location)

sft_tuning_job = sft.SupervisedTuningJob("projects/tech-talent-4-2bed493d/locations/europe-west1/tuningJobs/6174178765476724736")
tuned_model = GenerativeModel(sft_tuning_job.tuned_model_endpoint_name)


model = GenerativeModel("gemini-2.0-flash")



In [None]:
##Extract data
import pandas as pd

test_data = pd.read_csv('test_data_trial.csv')
print(test_data.head())
len(test_data)

                                            dialogue      label
0  caller: Hello, is this John? I'm calling from ...      Fraud
1  caller: Hello, is this John? receiver: No, thi...  Not Fraud
2  caller: Hello, is this Mr. Johnson?  receiver:...      Fraud
3  caller: Hello, is this Mr. Johnson? receiver: ...      Fraud
4  caller: Hello, this is John from Microsoft Tec...      Fraud


100

In [None]:
## Query tuned model - NB: if the full range is requested the query limit will be reached perform the query in chunks or add a backoff feature
for i in range(0, 100):
    response = tuned_model.generate_content(
        [test_data.iloc[i]['dialogue'],
        "please respond exclusively with either 'Fraud' or 'Not Fraud'"
        ]
    )
    test_data.loc[i, "guess_tuned"] = response.text.strip().split('Fraud')[0] + 'Fraud'

In [None]:
## Query base model
for i in range(0,100):
    response = model.generate_content(
        [test_data.iloc[i]['dialogue'],
        "please respond exclusively with either 'Fraud' or 'Not Fraud'"
        ]
    )
    test_data.loc[i, "guess_base"] = response.text.strip()

In [None]:
## Calculate accuracies
results_df = test_data[['label', 'guess_tuned', 'guess_base']]
print(results_df)


tuned_accuracy = sum((test_data["label"] == test_data["guess_tuned"]))/len(test_data)
base_accuracy = sum((test_data["label"] == test_data["guess_base"]))/len(test_data)

print(f"Tuned model accuracy: {tuned_accuracy}")
print(f"Base model accuracy: {base_accuracy}")

print(test_data["guess_tuned"].unique())

        label guess_tuned guess_base
0       Fraud       Fraud      Fraud
1   Not Fraud   Not Fraud  Not Fraud
2       Fraud       Fraud      Fraud
3       Fraud       Fraud      Fraud
4       Fraud       Fraud      Fraud
..        ...         ...        ...
95  Not Fraud   Not Fraud  Not Fraud
96      Fraud       Fraud      Fraud
97  Not Fraud   Not Fraud  Not Fraud
98      Fraud       Fraud      Fraud
99  Not Fraud   Not Fraud  Not Fraud

[100 rows x 3 columns]
Tuned model accuracy: 0.99
Base model accuracy: 0.95
['Fraud' 'Not Fraud']
