In [None]:
!pip install openai==0.28

In [None]:
import os
import openai
import sys
import csv
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
from google.colab import userdata
from collections import Counter
from sklearn.metrics import precision_recall_curve, average_precision_score, accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error

# Assigning API KEY to initialize openai environment
openai.api_key = "assign your OpenAI key here"

In [None]:
# Load the product data from the CSV file
# Change name of file depending on what you have uploaded.
with open("creative_validation.csv", encoding='utf-8', errors='ignore') as file:
    input_data = file.read()

# Initialize the context with interaction rules and product data
context = []

# Define the chatbot's interaction rules here, including how it should greet users, provide extra information.

rules = """For this task, you'll be asked to annotate album review sentences from the Pitchfork website. Before describing the task, let's have a quick look at Pitchfork. Pitchfork is a website publishing music reviews of mainly rock, but also folk, heavy metal, electronic music and hip-hop albums.
For each sentence group, follow these instructions :
Carefully read the text of the review, paying close attention to details. Be careful not to use shortcuts: for example, a positive phrase does not necessarily mean creativity.
Classify the phrase group as creative (1), uncreative (2) or indifferent (3)
Phrases should be coded as CREATIVE when they claim that the music of the artist or band is creative. This includes references to innovation within a music genre, bridging between several music genres, risk-taking, artistic openness, and technical innovation.
Phrases should be coded as UNCREATIVE when they evoke a lack of creativity on the part of the artist or group. This includes mentions of a blatant lack of innovation. Mentions of a lack of depth in lyrics or production. And finally, mentions of an impression of copying or déjà-vu.
Sentence groups should be coded as INDIFFERENT when they don't fit into any of these categories.
"""

context.append({'role': 'system', 'content': f"""{rules} {input_data}"""})

# Function to fetch messages from the OpenAI Chat model
def fetch_messages(messages, model="gpt-4", temperature=0):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response.choices[0].message["content"]

# Function to refresh and update the conversation context based on user input
def refresh_conversation(chat):
    context.append({'role': 'user', 'content': f"{chat}"})
    response = fetch_messages(context, temperature=0.7)
    context.append({'role': 'assistant', 'content': f"{response}"})
    return response

# Main loop to engage users in conversation
def main():
    results = {}

    for run in range(3):
      result = refresh_conversation("You were provided with a database, and your task is to classify each sentence as creative, uncreative, or indifferent. Please provide the id of the sentence along with the corresponding label.")

      #Spliting the result
      split_result = result.split('\n')

      #Initializing an empty list to store the structured result
      structured_result = []

      #Processing stage
      for i in split_result:
        part = i.strip().split(', ')
        if len(part) == 2:
          id, label = part[0], part[1]
          if id not in results:
            results[id] = [id, '', '', '']
          results[id][run + 1] = label

    #Outputing the results in the CSV file
    with open('output_all_runs.csv', 'w', newline = '') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(['ID', 'Label 1', 'Label 2', 'Label 3'])

        for row in results.values():
          csvwriter.writerows([row])

    with open("output_all_runs.csv") as file:
        input_data = file.read()

      # Split data into lines
        lines = input_data.strip().split('\n')

      # Create a dictionary to store IDs and their corresponding labels
        id_labels = {}

      # Extract IDs and labels
        for line in lines:
            entries = line.split(',')
            for i in range(1, len(entries)):
                if entries[0]:  # Check if ID exists
                    id_ = entries[0]
                    label = entries[i]
                    if id_ not in id_labels:
                        id_labels[id_] = []
                    if label:
                        id_labels[id_].append(label)

    with open('output.csv', 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(['ID', 'Label'])
        #Finding the majority label
        for id_, labels in id_labels.items():
            if labels:  # Check if there are labels for this ID
                majority_label = Counter(labels).most_common(1)[0][0]
                csvwriter.writerow([id_, majority_label])

    # Load ground truth labels
    with open("creative_validation_ground_truth.csv") as file:
        ground_truth_data = file.readlines()

    # Load predicted labels
    with open("output.csv") as file:
        predicted_data = file.readlines()

    # Initialize lists to store ground truth and predicted labels
    ground_truth_labels = []
    predicted_labels = []

    # Process ground truth and predicted data
    for ground_truth_line, predicted_line in zip(ground_truth_data, predicted_data):
        _, ground_truth_label = ground_truth_line.strip().split(',')
        _, predicted_label = predicted_line.strip().split(',')
        ground_truth_labels.append(ground_truth_label)
        predicted_labels.append(predicted_label)

    #Removing the first element since its the header "Label"
    ground_truth_labels.pop(0)
    predicted_labels.pop(0)
    ground_truth_labels.pop(0)
    predicted_labels.pop(0)

    # Compute evaluation metrics
    accuracy = accuracy_score(ground_truth_labels, predicted_labels)
    precision = precision_score(ground_truth_labels, predicted_labels, average='macro')
    recall = recall_score(ground_truth_labels, predicted_labels, average='macro')
    f1 = f1_score(ground_truth_labels, predicted_labels, average='macro')
    cross_tab = pd.crosstab(ground_truth_labels, predicted_labels)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    print(cross_tab)

if __name__ == '__main__':
    main()