In [None]:
from lib.database.database_connector import DatabaseConnector
from lib.gemini.prompt_generator import PromptGenerator

from tqdm import tqdm
import numpy as np
import json

In [None]:
database_connector = DatabaseConnector("./data/database.db")
cursor = database_connector.cursor

In [None]:
train_data = cursor.execute(
    "SELECT ticker, sentences, filing_date, label FROM embeddings WHERE filing_date < '2020-01-01';"
).fetchall()

test_data = cursor.execute(
    "SELECT ticker, sentences, filing_date, label FROM embeddings WHERE filing_date >= '2020-01-01';"
).fetchall()

In [None]:
train_data = [(ticker, " ".join(json.loads(sentences)), filing_date, label) for ticker, sentences, filing_date, label in tqdm(train_data, desc="Training")]
test_data = [(ticker, " ".join(json.loads(sentences)), filing_date, label) for ticker, sentences, filing_date, label in tqdm(test_data, desc="Testing")]

In [None]:
print(PromptGenerator.TEMPLATE)

In [None]:
train_prompts = [(PromptGenerator.generate_prompt(ticker, filing_date, mda), label) for ticker, mda, filing_date, label in tqdm(train_data, desc="Training")]

test_prompts = [(PromptGenerator.generate_prompt(ticker, filing_date, mda), label) for ticker, mda, filing_date, label in tqdm(test_data, desc="Testing")]

In [None]:
# test_prompts = test_prompts[:10]

# Gemini Predictions

In [None]:
from lib.gemini.gemini_chat import GeminiChat

In [None]:
def check_errors(gemini_answers):
    error_count = 0
    for answer in gemini_answers:
        if answer == None or answer == "ERROR":
            error_count += 1
    # print(f"{error_count}/{len(gemini_answers)}")
    return error_count

In [None]:
gemini_gym = GeminiChat(
    api_key="AIzaSyBI0PGGdAyIQODkVPsD8ua-9HM_Nq8sLOA",
    model_version="gemini-2.5-pro",
    system_prompt=PromptGenerator.get_system_prompt(),
    thinking_budget=-1
)

In [None]:
gemini_answers = []
y_true = []

pbar = tqdm(test_prompts, desc="Testing", total=len(test_prompts))

for prompt, label in pbar:
    gemini_answer = gemini_gym.predict(prompt)
    gemini_answers.append(gemini_answer)
    y_true.append(label)

    # Hata sayısını güncelle
    error_count = check_errors(gemini_answers)
    # Postfix ile barın yanına yaz
    pbar.set_postfix(errors=error_count)

In [None]:
check_errors(gemini_answers)

In [None]:
len(gemini_answers)

In [None]:
import json

with open("my_list.json", "w", encoding="utf-8") as f:
    json.dump(gemini_answers, f, ensure_ascii=False, indent=2)

In [None]:
# with open("my_list.json", "r", encoding="utf-8") as f:
#     loaded_list = json.load(f)

In [None]:
# tmp = gemini_answers

In [None]:
len(tmp)

In [None]:
len(gemini_answers)

In [None]:
tmp

In [None]:
loop_counter = 1
while check_errors(gemini_answers) != 0:
    print(f"Loop {loop_counter}")
    loop_counter += 1
    for idx, (prompt, label) in enumerate(tqdm(test_prompts, desc="Retrying ERROR prompts")):
        if gemini_answers[idx] == "ERROR" or gemini_answers[idx] == None:
            new_answer = gemini_gym.predict(prompt)
            gemini_answers[idx] = new_answer

print("Done !!")

In [None]:
gemini_answers[283]

In [None]:
y_pred = []
y_label = []
for idx, gemini_answer in enumerate(gemini_answers):
    if not (gemini_answer == None or gemini_answer == "ERROR"):
        print(gemini_answer, idx)
        y_pred.append(gemini_answer['prediction'])
        y_label.append(y_true[idx])
        gemini_answers[idx]['label'] = y_true[idx]

# Save Results

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
print("=== Random Forest Regression ===")
print("Accuracy:       ", accuracy_score(y_label, y_pred))
print("Precision:      ", precision_score(y_label, y_pred))
print("Recall:         ", recall_score(y_label, y_pred))
print("F1-score:       ", f1_score(y_label, y_pred))
tn, fp, fn, tp = confusion_matrix(y_label, y_pred).ravel()
print("TP: {:.3f}".format(tp))
print("TN: {:.3f}".format(tn))
print("FP: {:.3f}".format(fp))
print("FN: {:.3f}".format(fn))

In [None]:
import json
from typing import List, Dict

def save_dict_list_to_json(data: List[Dict], filename: str) -> None:
    """
    Verilen sözlükler listesini JSON formatında kaydeder.

    Args:
        data (List[Dict]): Kaydedilecek sözlükler listesi.
        filename (str): Oluşturulacak JSON dosyasının adı (uzantı dahil).
    """
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [None]:
save_dict_list_to_json(gemini_answers, "outputs/gemini/gemini_pro_without_sft_dyn_re.json")

In [None]:
import pandas as pd

result = pd.read_json("outputs/gemini/gemini_pro_without_sft_dyn_re.json")

In [None]:
y_pred = result["prediction"]
y_true = result["label"]

In [None]:
print("=== Random Forest Regression ===")
print("Accuracy:       ", accuracy_score(y_true, y_pred))
print("Precision:      ", precision_score(y_true, y_pred))
print("Recall:         ", recall_score(y_true, y_pred))
print("F1-score:       ", f1_score(y_true, y_pred))
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TP: {:.3f}".format(tp))
print("TN: {:.3f}".format(tn))
print("FP: {:.3f}".format(fp))
print("FN: {:.3f}".format(fn))

In [None]:
print("=== Random Forest Regression ===")
print("Accuracy:       ", accuracy_score(y_true, y_pred))
print("Precision:      ", precision_score(y_true, y_pred))
print("Recall:         ", recall_score(y_true, y_pred))
print("F1-score:       ", f1_score(y_true, y_pred))
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print("TP: {:.3f}".format(tp))
print("TN: {:.3f}".format(tn))
print("FP: {:.3f}".format(fp))
print("FN: {:.3f}".format(fn))

In [None]:
x = [1,2,3]

for i in x:
    i += 1

print(x)