In [2]:
import time
import csv
import openai
import logging
import re
import plotly.graph_objects as go
import os
from dotenv import load_dotenv

In [3]:
# Load environment variables from .env
load_dotenv()

# Retrieve the OpenAI API key from the environment
openai.api_key = os.getenv("OPENAI_API_KEY")

# Configure logging
logging.basicConfig(filename='evaluation_debug.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')


In [4]:
# Configure logging
logging.basicConfig(filename='logs/evaluation_debug.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def get_evaluation(comment):
    prompt = f"Evaluate the following comment: \"{comment}\" Here are the scores for each dimension:\nDiscrimination: 9/10\nPolitical Inclination: 8/10\nSource Credibility: 2/10\nPublic Perception: 5/10\nEmotional Appeal: 10/10"
    while True:
        try:
            logging.info(f"Evaluating comment: {comment}")
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are an assistant who evaluates comments on various dimensions."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=100
            )
            logging.info("Evaluation successful")
            return response['choices'][0]['message']['content'].strip()
        except openai.error.RateLimitError:
            logging.warning("Rate limit reached. Retrying in 60 seconds...")
            time.sleep(60)  # Wait for 60 seconds before retrying
        except Exception as e:
            logging.error(f"An error occurred: {e}")
            return "Error in evaluation"

In [5]:
# File paths
input_file_path = '/Users/shiruizhou/Desktop/RA_qualtrics_openAI/data/misinformation_comments.csv'
output_file_path = '/Users/shiruizhou/Desktop/RA_qualtrics_openAI/data/output_file.csv'

In [6]:
try:
    # Read the CSV file and write the results to a new CSV file
    with open(input_file_path, mode='r', newline='', encoding='utf-8') as input_file:
        csv_reader = csv.DictReader(input_file)
        fieldnames = csv_reader.fieldnames + ['Evaluation']

        with open(output_file_path, mode='w', newline='', encoding='utf-8') as output_file:
            csv_writer = csv.DictWriter(output_file, fieldnames=fieldnames)
            csv_writer.writeheader()

            for row in csv_reader:
                try:
                    # Get evaluation for each comment
                    evaluation = get_evaluation(row['Comment'])
                    row['Evaluation'] = evaluation
                    csv_writer.writerow(row)
                except Exception as row_error:
                    logging.error(f"Error processing row {row['Comment']}: {row_error}")
                    row['Evaluation'] = 'Evaluation Error'
                    csv_writer.writerow(row)

    logging.info(f"Evaluation results written to {output_file_path}")
    print(f"Evaluation results written to {output_file_path}")

except Exception as e:
    logging.critical(f"Critical error in processing the CSV file: {e}")
    print(f"An error occurred: {e}")


Evaluation results written to /Users/shiruizhou/Desktop/RA_qualtrics_openAI/data/output_file.csv


## Experimentation - radar graph comparision 

In [7]:
# Function to evaluate comments
def get_evaluation(comment):
    # Create the evaluation prompt
    prompt = (
        f"Evaluate the following comment: \"{comment}\" "
        "and provide an estimate for each dimension:\n"
        "Discrimination (on a scale of 1 to 10):\n"
        "Political Inclination (on a scale of 1 to 10):\n"
        "Source Credibility (on a scale of 1 to 10):\n"
        "Public Perception (on a scale of 1 to 10):\n"
        "Emotional Appeal (on a scale of 1 to 10):"
    )

    # Loop for retrying in case of rate limit
    retries = 0
    max_retries = 5

    while retries < max_retries:
        try:
            # Log the evaluation request
            logging.info(f"Evaluating comment: {comment}")

            # Make the API call to OpenAI
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are an assistant who evaluates comments on various dimensions."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=100
            )

            # Extract and return the response content
            evaluation_result = response['choices'][0]['message']['content'].strip()
            logging.info("Evaluation successful")
            return evaluation_result

        except openai.error.RateLimitError:
            retries += 1
            logging.warning(f"Rate limit reached. Retrying in 60 seconds... (Attempt {retries}/{max_retries})")
            time.sleep(60)  # Wait before retrying

        except openai.error.InvalidRequestError as e:
            logging.error(f"Invalid request error: {e}")
            return "Error: Invalid request"

        except openai.error.AuthenticationError:
            logging.error("Authentication error: Invalid API key.")
            return "Error: Authentication failed"

        except Exception as e:
            logging.error(f"An unexpected error occurred: {e}")
            return "Error in evaluation"

    logging.error("Max retries reached. Evaluation failed.")
    return "Error: Max retries reached"

# Example usage
comment = "The policies are biased towards certain groups."
result = get_evaluation(comment)
print(result)


Evaluation of the comment "The policies are biased towards certain groups":

Discrimination: 8/10 - The comment suggests potential bias in policies, which could indicate discrimination against specific groups.
Political Inclination: 7/10 - The mention of bias in policies can often be associated with a perception of political inclination.
Source Credibility: 6/10 - The comment is subjective and lacks specific details or evidence to establish high credibility.
Public Perception: 6/10 - Depending on


In [15]:
import time
import openai
import logging
import re
import plotly.graph_objects as go

# Function to evaluate comments
def get_evaluation(comment):
    # Create the evaluation prompt
    prompt = (
        f"Evaluate the following comment: \"{comment}\" "
        "and provide an estimate for each dimension:\n"
        "Discrimination (on a scale of 1 to 10):\n"
        "Political Inclination (on a scale of 1 to 10):\n"
        "Source Credibility (on a scale of 1 to 10):\n"
        "Public Perception (on a scale of 1 to 10):\n"
        "Emotional Appeal (on a scale of 1 to 10):"
    )

    retries = 0
    max_retries = 5

    while retries < max_retries:
        try:
            logging.info(f"Evaluating comment: {comment}")
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are an assistant who evaluates comments on various dimensions."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=100
            )

            evaluation_result = response['choices'][0]['message']['content'].strip()
            logging.info("Evaluation successful")

            # Debug: Print raw evaluation result to see the format
            print(f"Raw evaluation result for comment '{comment}':\n{evaluation_result}")
            return evaluation_result

        except openai.error.RateLimitError:
            retries += 1
            logging.warning(f"Rate limit reached. Retrying in 60 seconds... (Attempt {retries}/{max_retries})")
            time.sleep(60)

        except openai.error.InvalidRequestError as e:
            logging.error(f"Invalid request error: {e}")
            return "Error: Invalid request"

        except openai.error.AuthenticationError:
            logging.error("Authentication error: Invalid API key.")
            return "Error: Authentication failed"

        except Exception as e:
            logging.error(f"An unexpected error occurred: {e}")
            return "Error in evaluation"

    logging.error("Max retries reached. Evaluation failed.")
    return "Error: Max retries reached"

# Function to extract numerical scores from the evaluation result
def extract_scores(evaluation_text):
    # Debug: Print the evaluation text to inspect the format
    print(f"Extracting scores from evaluation text:\n{evaluation_text}")

    # Regex pattern to match "Dimension: X/10" where X is a number
    # This will match both bullet point format and inline text format
    pattern = re.compile(r'(Discrimination|Political Inclination|Source Credibility|Public Perception|Emotional Appeal)[^\d]*(\d+)/10')

    scores = {match[0]: int(match[1]) for match in pattern.findall(evaluation_text)}

    # Debug: Print the extracted scores
    print(f"Extracted Scores: {scores}")
    
    return scores

# Function to plot radar chart for comparison of two comments
def plot_radar(comment1_scores, comment2_scores):
    dimensions = list(comment1_scores.keys())
    comment1_values = list(comment1_scores.values())
    comment2_values = list(comment2_scores.values())

    fig = go.Figure()

    # Add first comment to radar plot
    fig.add_trace(go.Scatterpolar(
        r=comment1_values,
        theta=dimensions,
        fill='toself',
        name='Comment 1'
    ))

    # Add second comment to radar plot
    fig.add_trace(go.Scatterpolar(
        r=comment2_values,
        theta=dimensions,
        fill='toself',
        name='Comment 2'
    ))

    # Update layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 10])
        ),
        showlegend=True,
        title="Radar Chart Comparison of Two Comments"
    )

    # Show the radar chart
    fig.show()
    fig.save()

# Function to compare two comments and plot radar chart
def compare_two_comments(comment1, comment2):
    # Evaluate both comments
    eval1 = get_evaluation(comment1)
    eval2 = get_evaluation(comment2)

    # Extract the scores
    scores1 = extract_scores(eval1)
    scores2 = extract_scores(eval2)

    # Print extracted scores for debugging
    print("Comment 1 Scores:", scores1)
    print("Comment 2 Scores:", scores2)

    # Plot the radar chart for comparison
    plot_radar(scores1, scores2)


In [11]:
# Example comments to compare
comment1 = "The new tax law unfairly targets small businesses, making it harder for them to compete."
comment2 = "The tax reform will promote economic growth by giving large corporations more flexibility to innovate."


# Compare the two comments
compare_two_comments(comment1, comment2)

Raw evaluation result for comment 'The new tax law unfairly targets small businesses, making it harder for them to compete.':
Evaluation of the comment:
- Discrimination: 7/10 - The comment suggests that small businesses are being unfairly targeted, which implies some level of discrimination in the new tax law.
- Political Inclination: 8/10 - The comment takes a stance against a specific policy (tax law), indicating a potential bias or political inclination.
- Source Credibility: 6/10 - Without knowing the expertise or background of the person making the comment, it's challenging to assess the credibility
Raw evaluation result for comment 'The tax reform will promote economic growth by giving large corporations more flexibility to innovate.':
Discrimination: 3/10 - The comment does not show overt discrimination.
Political Inclination: 7/10 - The statement indicates support for tax reform benefiting large corporations, which is often associated with conservative political inclinations.
