# Local LLama notebook

Current notebook utilizes local LLama model installed on a local machine.\
Requirements:

In [1]:
# Import Libraries
import os
import sys
import time

project_dir = os
# Determine the project directory from the current working directory
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))
source_code_dir = os.path.join(project_dir, '10_Source_Code')
# Add the path to the directory containing your module to the system path
sys.path.append(source_code_dir)
import llama_setup as ls
import data_setup as ds

In [2]:
# Global Variables
CATEGORIES = [
        "Finance",
        "Production",
        "Reserves / Exploration / Acquisitions / Mergers / Divestments",
        "Environment / Regulatory / Geopolitics",
        "Alternative Energy / Lower Carbon",
        "Oil Price / Natural Gas Price / Gasoline Price"]

SENTIMENT_RESULTS_FILE_PATH = 'Full_data_LLama_model_sentiment_analysis_results.csv'

ROWS_TO_DROP = ['PQ-2840736837']

In [3]:
text_df = ds.load_cleaned_data()
text_df = ds.drop_unprocessable_rows(text_df, ROWS_TO_DROP)
#print(f"Dropped rows: {ROWS_TO_DROP}")

In [4]:
# Check if sentiment analysis results file exists
file_exists = ds.check_file_exists(SENTIMENT_RESULTS_FILE_PATH)

if file_exists:
    print(f"The file exists in the current directory.")
else:
    print(f"The file does not exist in the current directory.")
    empty_sentiment_df = ds.create_empty_sentiment_df(text_df, CATEGORIES)
    ds.save_dataframe_to_csv(empty_sentiment_df, SENTIMENT_RESULTS_FILE_PATH)
    print(f"Created and saved an empty sentiment analysis DataFrame to {SENTIMENT_RESULTS_FILE_PATH}")

The file exists in the current directory.


In [5]:
# Find the first unique ID with empty values
unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)
print(unique_id)

None


In [6]:
# Get LLama inputs
company, source, headline, text = ds.get_model_inputs(text_df, unique_id)
# print(f"Company: {company}\n")
# print(f"Source: {source}\n")
# print(f"Headline: {headline}\n")
# print(f"Text:\n{text}")

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# Define the template
TEMPLATE = """<s>Classify the following article into categories with sentiment (Positive, Neutral, Negative, N/A if not applicable or not mentioned) and provide the output in the specified dictionary format.
Example:
Article: ExxonMobil announced a significant increase in quarterly profits due to rising oil prices and increased production levels.
Output: {{"Finance": "Positive", 'Production': "Positive", "Reserves / Exploration / Acquisitions / Mergers / Divestments": 'Neutral', "Environment / Regulatory / Geopolitics": 'Neutral', "Alternative Energy / Lower Carbon": 'Neutral', "Oil Price / Natural Gas Price / Gasoline Price": "Positive"}}

Example:
Article: Chevron plans to invest heavily in renewable energy projects, aiming to reduce its carbon footprint over the next decade.
Output: {{'Finance': 'Neutral', 'Production': 'Neutral', "Reserves / Exploration / Acquisitions / Mergers / Divestments": 'Neutral', "Environment / Regulatory / Geopolitics": "Positive", "Alternative Energy / Lower Carbon": "Positive", "Oil Price / Natural Gas Price / Gasoline Price": 'Neutral'}}

Example:
Article: BP faced regulatory challenges in its latest drilling project, delaying operations and increasing costs.
Output: {{'Finance': 'Negative', "Production": 'Negative', "Reserves / Exploration / Acquisitions / Mergers / Divestments": 'Negative', "Environment / Regulatory / Geopolitics": 'Negative', "Alternative Energy / Lower Carbon": 'Neutral', "Oil Price / Natural Gas Price / Gasoline Price": 'Neutral'}}

Article: {article}

Output only the EXACT dictionary format:
{{"Finance": '[Sentiment]', "Production": '[Sentiment]', "Reserves / Exploration / Acquisitions / Mergers / Divestments": '[Sentiment]', "Environment / Regulatory / Geopolitics":: '[Sentiment]', "Alternative Energy / Lower Carbon": '[Sentiment]', "Oil Price / Natural Gas Price / Gasoline Price": '[Sentiment]'}}

Do not use any other format or additional information. Please provide the output in the specified format only.</s>"""


In [None]:
response = ls.get_ollama_response(text, TEMPLATE)
print(response)

Error: Prompt length exceeds maximum context length of 8192 tokens


In [None]:
response = ls.get_ollama_response(text, TEMPLATE)
print(response)
sentiment_dict = ds.extract_and_convert_to_dict(response)
print(sentiment_dict)
# Check if sentiment_dict is a dictionary before updating the CSV
# Check if sentiment_dict is a dictionary before updating the CSV
if isinstance(sentiment_dict, dict):
    ds.update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict)
else:
    print("Error: Sentiment dictionary not found. Skipping update.")
    # Create a dictionary with 'No JSON found' for each category
    sentiment_dict = {category: "No JSON found" for category in CATEGORIES}
    ds.update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict)

Error: Prompt length exceeds maximum context length of 8192 tokens
No JSON object found in the response.
Error: Sentiment dictionary not found. Skipping update.
Row with Unique_ID 'SEC-119312519043841' has been updated.


In [None]:
import pandas as pd 
review_df = pd.read_csv(SENTIMENT_RESULTS_FILE_PATH)
review_df.query('Unique_ID == "IR-1"')

Unnamed: 0,Source,Unique_ID,Ticker,Date,URL,Finance,Production,Reserves / Exploration / Acquisitions / Mergers / Divestments,Environment / Regulatory / Geopolitics,Alternative Energy / Lower Carbon,Oil Price / Natural Gas Price / Gasoline Price,Reserves / Exploration / Acquisitions / Mergers / Divestitures
0,Investment Research,IR-1,MRO,2024-05-16,,Neutral,Positive,Neutral,Neutral,Not applicable,Neutral,


In [None]:
# Main Loop
start_time = time.time()
unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)
count = 0
max_tries = 5

while unique_id:
    retries = 0
    success = False

    while retries < max_tries and not success:
        try:
            company, source, headline, text = ds.get_model_inputs(text_df, unique_id)
            response = ls.get_ollama_response(text, TEMPLATE)
            sentiment_dict = ds.extract_and_convert_to_dict(response)

            if isinstance(sentiment_dict, dict):
                ds.update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict)
                success = True
            else:
                print("Error: Sentiment dictionary not found. Skipping update.")
                retries += 1
        except Exception as e:
            print(f"Error: {e}")
            retries += 1
    if not success:
        print(f"Max retries reached for Unique_ID '{unique_id}'. Inserting 'No JSON found' for each category.")
        sentiment_dict = {category: "No JSON found" for category in CATEGORIES}
        ds.update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict, CATEGORIES)


    count += 1
    if count % 10 == 0:
        elapsed_time = time.time() - start_time
        minutes, seconds = divmod(elapsed_time, 60)
        print(f"Iteration: {count}, Elapsed Time: {int(minutes)} minutes and {seconds:.2f} seconds")

    unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)

Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Max retries reached for Unique_ID 'SEC-119312519134334'. Inserting 'No JSON found' for each category.


TypeError: update_csv() takes 3 positional arguments but 4 were given

In [None]:
# Main Loop
start_time = time.time()
unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)
count = 0
MAX_TRIES = 5

while unique_id:
    retries = 0
    success = False

    while retries < MAX_TRIES and not success:
        try:
            company, source, headline, text = ds.get_model_inputs(text_df, unique_id)
            response = ls.get_ollama_response(text, TEMPLATE)
            sentiment_dict = ds.extract_and_convert_to_dict(response)

            if isinstance(sentiment_dict, dict):
                update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict, CATEGORIES)
                success = True
            else:
                print("Error: Sentiment dictionary not found. Skipping update.")
                retries += 1
        except Exception as e:
            print(f"Error: {e}")
            retries += 1

    if not success:
        print(f"Max retries reached for Unique_ID '{unique_id}'. Inserting 'No JSON found' for each category.")
        sentiment_dict = {category: "No JSON found" for category in CATEGORIES}
        ds.update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict, CATEGORIES)

    count += 1
    if count % 10 == 0:
        elapsed_time = time.time() - start_time
        minutes, seconds = divmod(elapsed_time, 60)
        print(f"Iteration: {count}, Elapsed Time: {int(minutes)} minutes and {seconds:.2f} seconds")

    unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)


Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Error: Sentiment dictionary not found. Skipping update.
Max retries reached for Unique_ID 'SEC-119312519134334'. Inserting 'No JSON found' for each category.


TypeError: update_csv() takes 3 positional arguments but 4 were given

In [7]:
def update_csv(file_path, unique_id, sentiment_dict, categories):
    """
    Updates the columns of a CSV file based on the unique ID and sentiment dictionary.

    Args:
        file_path (str): The path to the CSV file.
        unique_id (str): The unique ID of the row to be updated.
        sentiment_dict (dict): A dictionary with categories as keys and their corresponding sentiments as values.
        categories (list): List of all possible categories.

    Returns:
        None
    """
    df = pd.read_csv(file_path)
    row_index = df[df['Unique_ID'] == unique_id].index
    for category in categories:
        sentiment = sentiment_dict.get(category, "No JSON found")
        if sentiment == "N/A":
            sentiment = "Neutral"
        df.loc[row_index, category] = sentiment
    df.to_csv(file_path, index=False)
    print(f"Row with Unique_ID '{unique_id}' has been updated.")

# Main Loop
start_time = time.time()
unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)
count = 0

while unique_id:
    retries = 0
    success = False

    while retries < MAX_TRIES and not success:
        try:
            company, source, headline, text = ds.get_model_inputs(text_df, unique_id)
            response = ls.get_ollama_response(text, TEMPLATE)
            sentiment_dict = ds.extract_and_convert_to_dict(response)

            if isinstance(sentiment_dict, dict):
                update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict, CATEGORIES)
                success = True
            else:
                print("Error: Sentiment dictionary not found. Skipping update.")
                retries += 1
        except Exception as e:
            print(f"Error: {e}")
            retries += 1

    if not success:
        print(f"Max retries reached for Unique_ID '{unique_id}'. Inserting 'No JSON found' for each category.")
        sentiment_dict = {category: "No JSON found" for category in CATEGORIES}
        update_csv(SENTIMENT_RESULTS_FILE_PATH, unique_id, sentiment_dict, CATEGORIES)

    count += 1
    if count % 10 == 0:
        elapsed_time = time.time() - start_time
        minutes, seconds = divmod(elapsed_time, 60)
        print(f"Iteration: {count}, Elapsed Time: {int(minutes)} minutes and {seconds:.2f} seconds")

    unique_id = ds.find_first_unique_id_with_empty_values(SENTIMENT_RESULTS_FILE_PATH, CATEGORIES)
