# Imports

In [None]:
import json
import google.generativeai as genai
import pandas as pd

In [None]:
import sys
ROOT = '../'
sys.path.append(ROOT)  # Add the root folder to the sys.path

# Import the modules
from config import *
from utils.config import *
from utils.utils import *
from utils.gemini_utils import *

# Reload the configuration
from importlib import reload
reload(sys.modules['config'])
reload(sys.modules['utils.config'])
reload(sys.modules['utils.utils'])
reload(sys.modules['utils.gemini_utils'])

# Import the reloaded modules
from config import *
from utils.config import *
from utils.utils import *
from utils.gemini_utils import *

# Model configurations

In [None]:
# Import reddit credentials from twitter.json
with open(os.path.join(ROOT, 'secrets/gemini.json')) as file:
    creds = json.load(file)  

# Select the Google API key
google_api_key = creds['GOOGLE_API_KEY_2']

# Set up the API key
genai.configure(api_key=google_api_key)

In [None]:
# Gemini model configurations
generation_config, safety_settings = gemini_configurations()

model = genai.GenerativeModel(
  model_name=MODEL_NAME,
  safety_settings=safety_settings,
  generation_config=generation_config,
)

## Setup datasets

In [None]:
# Set the paths
DATASET_TYPE = "daily" # "daily" | "hourly"
ANNOTATED_DATASET_NAME = DATASET_TYPE + "_" + MODEL_NAME + "_opinion.csv"

# Set the paths
ORIGINAL_DATASET_PATH = os.path.join(ROOT, MERGED_DATASET_PATH, "merged_" + DATASET_TYPE + ".csv")
OPINION_DATASET_PATH = os.path.join(ROOT, ANNOTATED_DATASET_PATH, ANNOTATED_DATASET_NAME)
OUTPUT_DATASET_PATH = os.path.join(ROOT, ANNOTATED_DATASET_PATH, "merged_" + ANNOTATED_DATASET_NAME)
OUTPUT_NO_TEXT_DATASET_PATH = os.path.join(ROOT, ANNOTATED_DATASET_PATH, "merged_no_text_" + ANNOTATED_DATASET_NAME)

In [None]:
# Read daily dataset from the file
original_dataset = pd.read_csv(ORIGINAL_DATASET_PATH)
original_dataset

In [None]:
non_empty_rows = []
empty_rows = []

# Check if the dataset exists
if os.path.exists(OPINION_DATASET_PATH):
    print(f"Loading the {OPINION_DATASET_PATH} dataset...")
    # Load the opinion_df dataset
    opinion_df = pd.read_csv(OPINION_DATASET_PATH)

    # Sobstitute the NaN values with None
    opinion_df.fillna('None', inplace=True)

    # Select the rows that are and are not None
    non_empty_rows = opinion_df[opinion_df['reasoning_text'] != 'None']
    empty_rows = opinion_df[opinion_df['reasoning_text'] == 'None']

    # Display the number of rows that are not None
    print(f"Number of rows that are not None: {non_empty_rows.shape[0]}")
    print(f"Number of rows that are None: {empty_rows.shape[0]}")
else:
    print(f"Creating the {OPINION_DATASET_PATH} dataset...")
    # Create a new dataset with row_index, reasoning_text and sentiment_class columns starting from the merged_daily dataset
    # Copy the index from the merged_daily dataset to the new dataset
    opinion_df = original_dataset.copy()
    # Drop the columns from the new dataset except the index
    opinion_df.drop(columns=original_dataset.columns, inplace=True)
    # Add the reasoning_text and sentiment_class columns to the new dataset
    opinion_df['reasoning_text'] = 'None'
    opinion_df['sentiment_class'] = 'None'
    opinion_df['action_class'] = 'None'
    opinion_df['action_score'] = 'None'
opinion_df

In [None]:
# Define the func_kwargs path
func_kwargs_path = os.path.join(ROOT, ANNOTATED_DATASET_PATH, FUNC_KWARGS_FOLDER_NAME, FUNC_KWARGS_FOLDER_NAME + "_" + MODEL_NAME + ".json")

# Check if it is a test or not
TEST = False

# Check if the func_kwargs file exists
if os.path.exists(func_kwargs_path):
    print(f"Loading the {func_kwargs_path} file...")
    # Load the func_kwargs file
    with open(func_kwargs_path, 'r') as f:
        func_kwargs = json.load(f)
else:
    print(f"Creating the {func_kwargs_path} file...")
    # Create a new func_kwargs
    # For each row in the dataset, populate the func_kwargs list with the input text and the index of each row
    func_kwargs = populate_func_kwargs(
        model_name=MODEL_NAME, 
        merged_dataset=original_dataset, 
        opinion_dataset=opinion_df, 
        max_tokens=INPUT_TOKENS, 
        instructions=INSTRUCTIONS, 
        model_tokenizer=None, # No tokenizer is needed
        test=TEST,
        )

    if not TEST:
        # Save the func_kwargs dictionary to the file
        with open(func_kwargs_path, 'w') as f:
            json.dump(func_kwargs, f)

In [None]:
# func_kwargs[0]

In [None]:
# Select only the queries that are not annotated
func_kwargs = [query for query in func_kwargs if opinion_df.loc[query['index'], 'reasoning_text'] == 'None']

# Conunt the number of queries that are not annotated
print(f"Number of queries that are already annotated: {len(opinion_df) - len(func_kwargs)}")
print(f"Number of queries that are not annotated: {len(func_kwargs)}")

# Call Gemini API using RateNinja

In [None]:
def call_gemini_api(index, input_text):
    try:
        # Generate the reasoning and sentiment
        response = model.generate_content(input_text)
        response_json = ast.literal_eval(response.text)

        # Check the response
        reasoning_text, sentiment_class, action_class, action_score = check_response(response_json)

        # Update the sentiment dataset
        opinion_df.loc[index, 'reasoning_text'] = reasoning_text
        opinion_df.loc[index, 'sentiment_class'] = sentiment_class
        opinion_df.loc[index, 'action_class'] = action_class
        opinion_df.loc[index, 'action_score'] = action_score

        # Save temporary results
        opinion_df.to_csv(os.path.join(ROOT, ANNOTATED_DATASET_PATH, ANNOTATED_DATASET_NAME), index=False)

        return index
    except Exception as e:
        print(f"Error: {e}")
        raise Exception(f"Error: {e} at index {index}")

In [None]:
# Execute the API call
results, errors = RATENINJA(call_gemini_api, func_args=None, func_kwargs=func_kwargs)

In [None]:
results

In [None]:
errors

In [None]:
opinion_df

## Check and save results

In [None]:
previous_empty_rows = empty_rows.copy()
previous_non_empty_rows = non_empty_rows.copy()

# Show the previous number of empty and non-empty rows
print(f"Previous number of empty rows: {len(previous_empty_rows)}")
print(f"Previous number of non-empty rows: {len(previous_non_empty_rows)}")

# Select the empty and non-empty rows
empty_rows = opinion_df[(opinion_df['reasoning_text'] == 'None') | (opinion_df['sentiment_class'] == 'None') | (opinion_df['action_class'] == 'None') | (opinion_df['action_score'] == 'None')]
non_empty_rows = opinion_df[(opinion_df['reasoning_text'] != 'None') & (opinion_df['sentiment_class'] != 'None') & (opinion_df['action_class'] != 'None') & (opinion_df['action_score'] != 'None')]

# Show the new number of empty and non-empty rows
print(f"New number of empty rows: {empty_rows.shape[0]}")
print(f"New number of non-empty rows: {non_empty_rows.shape[0]}")

In [None]:
empty_rows

In [None]:
non_empty_rows

In [None]:
# Check unique values
print(f"Unique sentiment_class values: {opinion_df['sentiment_class'].unique()}")
print(f"Unique action_class values: {opinion_df['action_class'].unique()}")
print(f"Unique action_score values: {opinion_df['action_score'].unique()}")

In [None]:
# Check the type of the columns
opinion_df.dtypes

In [None]:
# Append opinion_df to the original dataset
original_dataset_with_opinion = pd.concat([original_dataset, opinion_df], axis=1)
original_dataset_with_opinion

In [None]:
# Create a dataset from the original one without text (i.e., except cointelegrap, reddit, reasoning_text columns)
original_dataset_without_text = original_dataset_with_opinion.drop(columns=['cointelegraph', 'bitcoin_news', 'reddit', 'reasoning_text'])

In [None]:
original_dataset_without_text

In [None]:
# Save the daily opinion dataset
opinion_df.to_csv(os.path.join(ROOT, ANNOTATED_DATASET_PATH, ANNOTATED_DATASET_NAME), index=False)

In [None]:
# Save the daily dataset with no text
original_dataset_without_text.to_csv(OUTPUT_NO_TEXT_DATASET_PATH, index=False)

In [None]:
# Save the daily merged dataset
original_dataset_with_opinion.to_csv(OUTPUT_DATASET_PATH, index=False)