In [160]:
import plotly.graph_objects as go
from openai import OpenAI
import pandas as pd
import json
import os

OPENAI_API_KEY=os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key=OPENAI_API_KEY)

# MODERATION

In [161]:
def generate_completion(input):
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content":
                '''
                You are a moderator, your goal is to find if a message is offensing or not. 
                You have to categorize them in 3 categories.
                red : Insulting message, hate, sexual, harrassment, or any other kind of offensing message. It had to be very outraging.
                orange : Message givin an opinion about potential sensitive subject but not offensing anyone or insulting.
                green : a message without any disrepectfull word and not talking about potentital sensitive subject.
                you have to return the result as a json object with a score of confidence for each of the catogory.
                '''
            },
            {
                "role": "user",
                "content": input
            }
        ]
    )
    return completion.choices[0].message.content

def parse_completion(completion):
    return json.loads(completion)

In [230]:
def generate_moderation(input):
    response = client.moderations.create(input=input)
    return response

def parse_moderation(moderation):
    dictionnary_moderation = \
        {
            'flagged' : moderation.results[0].flagged,
            'categories' : {
                'harassment' : {
                    'flagged' : moderation.results[0].categories.harassment,
                    'score' : moderation.results[0].category_scores.harassment,
                    },
                'harassment threatening' : {
                        'flagged' : moderation.results[0].categories.harassment_threatening,
                        'score' : moderation.results[0].category_scores.harassment_threatening,
                    },
                'hate' : {
                        'flagged' : moderation.results[0].categories.hate,
                        'score' : moderation.results[0].category_scores.hate,
                    },
                'hate threatening' : {
                        'flagged' : moderation.results[0].categories.hate_threatening,
                        'score' : moderation.results[0].category_scores.hate_threatening,
                    },
                'self harm' :   {
                        'flagged' : moderation.results[0].categories.self_harm,
                        'score' : moderation.results[0].category_scores.self_harm,
                    },
                'self harm instructions' : {
                        'flagged' : moderation.results[0].categories.self_harm_instructions,
                        'score' : moderation.results[0].category_scores.self_harm_instructions,
                    },
                'self harm intent' : {
                        'flagged' : moderation.results[0].categories.self_harm_intent,
                        'score' : moderation.results[0].category_scores.self_harm_intent,
                    },
                'sexual' : {
                        'flagged' : moderation.results[0].categories.sexual,
                        'score' : moderation.results[0].category_scores.sexual,
                    },
                'sexual minors' : {
                        'flagged' : moderation.results[0].categories.sexual_minors,
                        'score' : moderation.results[0].category_scores.sexual_minors,
                    },
                'violence' : {
                        'flagged' : moderation.results[0].categories.violence,
                        'score' : moderation.results[0].category_scores.violence,
                    },
                'violence graphic' : {
                        'flagged' : moderation.results[0].categories.violence_graphic,
                        'score' : moderation.results[0].category_scores.violence_graphic,
                    },

                }

        }
    return dictionnary_moderation

# GRAPH

In [231]:
def plot_parsed_moderation(parsed_moderation):
    categories = []
    scores = []

    # Iterate through each category in the dictionary
    for category, data in parsed_moderation['categories'].items():

        # If flagged, append the category name to the categories list
        if data['flagged']:

            categories.append(category)
            # Append the score of the flagged category to the scores list
            scores.append(data['score'])

    fig = go.Figure(data=go.Scatterpolar(
        r=scores,
        theta=categories,
        fill='toself',
        mode='none'
        ),
    )


    # Update layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                range=[0, 'auto'], 
                visible=False
            ),
        ),
        showlegend=False,
        title="Moderation categories",
    )

    fig.show()

In [232]:
def plot_multiple_parsed_moderation(list_of_moderation):
    # Initialize dictionaries to store accumulated scores and flagged status for each category
    category_scores = {}
    category_flagged = {}

    # Iterate through each instance of res
    for res_instance in list_of_moderation:
        # Iterate through each category in the dictionary
        for category, data in res_instance['categories'].items():
            # Accumulate the score for the category
            category_scores.setdefault(category, [])
            category_scores[category].append(data['score'])
            
            # Set flagged status to True if at least one instance has it flagged
            if data['flagged']:
                category_flagged[category] = True

    # Calculate the average score for each category
    categories = []
    averaged_scores = []
    for category, scores in category_scores.items():

        if category in category_flagged:

            # Calculate the average score
            average_score = sum(scores) / len(scores)
            categories.append(category)
            averaged_scores.append(average_score)

    # Set flagged status for each category
    flagged_categories = [category for category, flagged in category_flagged.items() if flagged]

    # Create the plot
    fig = go.Figure(data=go.Scatterpolar(
        r=averaged_scores,
        theta=categories,
        fill='toself',
        mode='none'
    ))

    # Update layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=False,
                range=[0, 'auto']
            ),
        ),
        showlegend=False,
        title="Moderation categories",
    )

    # Show the plot
    fig.show()

In [242]:
def plot_flagged_vs_nonflagged(list_of_moderation):
    # Initialize counters for flagged and non-flagged messages
    flagged_counts = 0
    non_flagged_counts = 0

    # Iterate through each instance of moderation
    for moderation_instance in list_of_moderation:
        # Increment counters based on the flagged status
        if moderation_instance['flagged']:
            flagged_counts += 1
        else:
            non_flagged_counts += 1

    # Create the graph
    fig = go.Figure(data=[
        go.Bar(name='Non-Flagged', x=['Non-Flagged'], y=[non_flagged_counts]),
        go.Bar(name='Flagged', x=['Flagged'], y=[flagged_counts])
    ])

    # Update layout
    fig.update_layout(barmode='group', title='Messages Flagged vs Non-Flagged', xaxis_title='Flagged Status', yaxis_title='Count')

    # Show the graph
    fig.show()

# TEST

In [235]:
completion_1 = generate_completion("va crever")
parse_completion(completion_1)

{'red': 0.9, 'orange': 0.1, 'green': 0}

In [236]:
moderation_1 = generate_moderation("Nina tu es vraiment chiante je vais te faire l'amour et te tuer apres.")
parsed_moderation_1 = parse_moderation(moderation_1)

moderation_2 = generate_moderation(" ?")
parsed_moderation_2 = parse_moderation(moderation_2)

moderation_3 = generate_moderation("va crever t'es nul mais tres sexy va te suicider")
parsed_moderation_3 = parse_moderation(moderation_3)

In [237]:
list_of_parsed_moderation = [parsed_moderation_1, parsed_moderation_2, parsed_moderation_3]
# Print flagged categories
for res in list_of_parsed_moderation :
    print('\n')
    for category, data in res['categories'].items():
        if data['flagged']:
            print(category + ' : ', data['score'])



harassment :  0.6455599665641785
harassment threatening :  0.8196607232093811
sexual :  0.6600853204727173
violence :  0.9653856158256531




harassment :  0.8461693525314331
harassment threatening :  0.7307544350624084
self harm :  0.8242273330688477
self harm instructions :  0.781873345375061
self harm intent :  0.7638778686523438


In [241]:
plot_parsed_moderation(parsed_moderation_1)

In [239]:
plot_multiple_parsed_moderation(list_of_parsed_moderation)

In [243]:
plot_flagged_vs_nonflagged(list_of_parsed_moderation)