# Install CodeCarbon package
Read the [documentation](https://mlco2.github.io/codecarbon/) about the library if necessary. Remember that we provide a [CodeCarbon notebook](https://colab.research.google.com/drive/1boavnGOir0urui8qktbZaOmOV2pS5cn6?usp=sharing) with the example in its specific use in our competition.


In [None]:
!pip install codecarbon
!pip install dotenv

# Import libraries

In [None]:
import requests, zipfile, io
from requests.adapters import HTTPAdapter, Retry
from typing import List, Dict
import random
import json
import os
from dotenv import load_dotenv
import pandas as pd
from codecarbon import EmissionsTracker

# Endpoints
These URL addresses are necessary for the connection to the server.

**IMPORTANT:** Replace "URL" by the URL server and "TOKEN" by your user token.

In [None]:
load_dotenv()
URL = os.getenv("SERVER_URL")
TOKEN = os.getenv("ACCESS_TOKEN")
print(URL, TOKEN)
# Download endpoints
ENDPOINT_DOWNLOAD_TRIAL = URL+"/{TASK}/download_trial/{TOKEN}"
ENDPOINT_DOWNLOAD_TRAIN = URL+"/{TASK}/download_train/{TOKEN}"

# Trial endpoints
ENDPOINT_GET_MESSAGES_TRIAL = URL+"/{TASK}/getmessages_trial/{TOKEN}"
ENDPOINT_SUBMIT_DECISIONS_TRIAL = URL+"/{TASK}/submit_trial/{TOKEN}/{RUN}"

# Test endpoints
ENDPOINT_GET_MESSAGES = URL+"/{TASK}/getmessages/{TOKEN}"
ENDPOINT_SUBMIT_DECISIONS = URL+"/{TASK}/submit/{TOKEN}/{RUN}"

# Download Data
To download the data, you can make use of the **function defined in the following**.

The following function download the trial data. To adapt it to download the train and test data, follow the instructions given in the [website of the competition](https://sites.google.com/view/mentalriskes2024/evaluation).

In [None]:
def download_messages_trial(task: str, token: str):
    """ Allows you to download the trial data of the task.
        Args:
          task (str): task from which the data is to be retrieved
          token (str): authentication token
    """

    response = requests.get(ENDPOINT_DOWNLOAD_TRIAL.format(TASK=task, TOKEN=token))

    if response.status_code != 200:
        print("Trial - Status Code " + task + ": " + str(response.status_code) + " - Error: " + str(response.text))
    else:
      z = zipfile.ZipFile(io.BytesIO(response.content))
      os.makedirs("./data/{task}/trial/".format(task=task))
      z.extractall("./data/{task}/trial/".format(task=task))

In [None]:
def download_messages_train(task: str, token: str):
    """ Allows you to download the train data of the task.
        Args:
          task (str): task from which the data is to be retrieved
          token (str): authentication token
    """
    response = requests.get(ENDPOINT_DOWNLOAD_TRAIN.format(TASK=task, TOKEN=token))

    if response.status_code != 200:
        print("Train - Status Code " + task + ": " + str(response.status_code) + " - Error: " + str(response.text))
    else:
      z = zipfile.ZipFile(io.BytesIO(response.content))
      os.makedirs("./data/{task}/train/".format(task=task),exist_ok=True)
      z.extractall("./data/{task}/train/".format(task=task))

# Main

In [None]:
def download_data(task: str, token: str):
    # download_messages_trial(task, token)
    download_messages_train(task, token)

def get_post_data(task: str, token: str):
    # Emissions Tracker Config
    config = {
        "save_to_file": True,
        "log_level": "WARNING",
        "tracking_mode": "process",
        "output_dir": ".",
        "allow_multiple_runs": True
    }
    tracker = EmissionsTracker(**config)

    number_runs = 3 # Max: 3

    # Prediction period
    client_task1_2 = Client_task1_2(task, token, number_runs, tracker)
    client_task1_2.run_task1_2(5, 0.1)

Be careful! In this specific example we use the name of the task1 to do the get, knowing that it is the same data for both task 1 and task 2. In addition, the data upload is performed for both tasks.

In [None]:
if __name__ == '__main__':
    download_data("task2", TOKEN)
    # get_post_data("task1",TOKEN)

In [None]:
!pip install groq

Collecting groq
  Downloading groq-0.22.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.22.0-py3-none-any.whl (126 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.7/126.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.22.0


In [None]:
from groq import Groq
import re
import os


# Task 1 (LLM)

In this section, we use a large language model (LLaMA 3 with 70 billion parameters) to classify users based on the content of their messages. The model is prompted to assess whether there are signs of a gambling disorder in the user’s language. Each user is assigned a binary label:

*   0 if there is no indication of a gambling disorder,

*  1 if there is any indication suggesting the presence of such a disorder.



In [None]:
# Read the data
import json
import os
import datetime
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


data_path = Path("data/")

annotation_path = data_path / "task1" / "train" / "gold_task1.txt"
df = pd.read_csv(annotation_path)
labels_df = df.rename(columns={"Subject": "user", "Risk": "label"})
user_messages_path = data_path / "task1" / "train" / "subjects"
user_messages = list(user_messages_path.glob("*.json"))
messages_users = [s.name.replace(".json", "") for s in user_messages]
with open(user_messages[0], "r") as file:
    messages = json.load(file)
messages_df = pd.DataFrame(messages)
messages_df["date"] = pd.to_datetime(messages_df["date"])
messages_df.set_index("date", inplace=True)
all_messages = []
for user_file in user_messages:
    with open(user_file, "r") as file:
        messages = json.load(file)
    messages_df = pd.DataFrame(messages)
    messages_df["date"] = pd.to_datetime(messages_df["date"], format="%Y-%m-%d %H:%M:%S%z")
    messages_df["user"] = user_file.name.replace(".json", "")
    all_messages.append(messages_df)

all_messages_df = pd.concat(all_messages, ignore_index=True)
all_messages_df.set_index("date", inplace=True)

In [None]:
# Create dictionary with every message in a list
all_messages_df.head()

user_messages = all_messages_df.groupby('user')['message'].apply(list).to_dict()


In [None]:
def predict_with_llama1(messages):
    """
    Uses the LLaMA 3 70B language model via the Groq API to analyze a user's messages and determine
    whether they show signs of gambling disorder.

    The function sends a prompt to the model that includes specific criteria for identifying signs
    of compulsive gambling behavior (e.g., frequent references to betting, inability to stop,
    financial loss, etc.).

    Based on the model's interpretation, it returns:
    - 1 if there are multiple clear and direct indications of gambling addiction,
    - 0 if there is insufficient or no strong evidence of such behavior.

    Parameters:
        messages (str): A string containing one or more user messages to be evaluated.

    Returns:
        int: 1 if gambling disorder is detected, 0 otherwise.
    """
    client = Groq(
        api_key=os.getenv("GROQ_API_KEY"),
    )

    prompt = f"""
    A continuación se te dará una serie de mensajes escritos por una persona. Tu tarea es analizar el contenido y determinar si hay indicios claros y repetidos de comportamientos relacionados con la ludopatía (adicción al juego).

    Entendemos por indicios claros:
    - Referencias frecuentes a apuestas, juegos de azar, o trading compulsivo.
    - Expresiones de pérdida de control, ansiedad por jugar, o necesidad urgente de recuperar dinero.
    - Menciones a pérdidas económicas significativas por estos juegos o conductas similares.
    - Dificultad para dejar de jugar o pensar en el juego constantemente.

    No debes interpretar todos los comentarios sobre apostar, jugar a juegos de azar o realizar trading como muestra de adicción ya que el objetivo es únicamente detectar los casos de ludopatía, adicción y pérdida de control.

    Tu respuesta debe ser exclusivamente un número:
    - 1 si hay varios indicios claros y directos de ludopatía.
    - 0 si no hay suficientes evidencias claras (incluso si hay menciones vagas o aisladas).

    Ahora evalúa los siguientes mensajes:
    {messages}
    """

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-70b-8192",
    )

    response = int(chat_completion.choices[0].message.content)

    return response


Now, we iterate over every message, call the predictive function and save the result in a new dataframe.

In [None]:
pred_rows = []
for i, user in enumerate(sorted(user_messages.keys())):
    print(f"Iteration {i + 1} of 350")
    label = predict_with_llama1(user_messages[user])
    try:
        pred_rows.append({
            'user': user,
            'label': label,
        })

        if i % 10 == 0 and i != 0:
            temp_df = pd.DataFrame(pred_rows)
            temp_df.to_csv("predictions_backup2.csv", index=False)
            print(f"Saved up to iteration {i}")

    except Exception as e:
        print(f"Error while processing: {e}")

predictions = pd.DataFrame(pred_rows)

predictions.to_csv("predictions_final2.csv", index=False)

Finally, run the comparisons

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df_pred = pd.read_csv('./predictions_task1.csv')  # Archivo con las predicciones


df = pd.merge(labels_df, df_pred, on='user')

accuracy = accuracy_score(df['label_x'], df['label_y'])  # Comparar las columnas de etiquetas
conf_matrix = confusion_matrix(df['label_x'], df['label_y'])
class_report = classification_report(df['label_x'], df['label_y'])

print("Accuracy:", accuracy)
print("\nConfusion matrix:\n", conf_matrix)
print("\nReport:\n", class_report)

TP, FP, FN, TN = conf_matrix.ravel()

true_positive_rate = TP / (TP + FN)
false_positive_rate = FP / (FP + TN)
true_negative_rate = TN / (TN + FP)
false_negative_rate = FN / (FN + TP)

print(f"True Positive Rate (TPR): {true_positive_rate:.2f}")
print(f"False Positive Rate (FPR): {false_positive_rate:.2f}")
print(f"True Negative Rate (TNR): {true_negative_rate:.2f}")
print(f"False Negative Rate (FNR): {false_negative_rate:.2f}")


Accuracy: 0.5485714285714286

Confusion matrix:
 [[143  35]
 [123  49]]

Report:
               precision    recall  f1-score   support

           0       0.54      0.80      0.64       178
           1       0.58      0.28      0.38       172

    accuracy                           0.55       350
   macro avg       0.56      0.54      0.51       350
weighted avg       0.56      0.55      0.52       350

True Positive Rate (TPR): 0.54
False Positive Rate (FPR): 0.42
True Negative Rate (TNR): 0.58
False Negative Rate (FNR): 0.46


The model's performance, with an accuracy of 55%, is relatively low, indicating that it struggles to make reliable predictions. The confusion matrix reveals that while the model is fairly good at identifying negative cases (with a True Negative Rate of 58%), it has significant difficulty in detecting positive cases. The True Positive Rate (TPR) is only 28%, meaning the model fails to identify a large portion of individuals who may actually have gambling disorders. This is further reflected in the F1-score for class 1 (gambling disorder), which is quite low at 0.38.

This shows that the nature of gambling disorder might be complex, with subtle and diverse linguistic cues that are difficult for a pre-trained language model to detect.


# Task 2

In this section, we use a large language model (LLaMA 3 with 70 billion parameters) to classify users based on the content of their messages, specifically identifying the type of gambling disorder behavior they may exhibit. The model is prompted to assess the user's language and categorize their behavior into one of four specific types of gambling addiction:

* betting : Addiction to traditional betting activities such as sports betting, horse racing, or casino games (physical or online).  
* lootboxes : Addiction to purchasing random virtual rewards in video games, such as loot boxes or in-game item purchases.  
* onlinegaming : Addiction to online multiplayer video games, where compulsive playing occurs without direct monetary transactions.  
* trading : Addiction to financial asset trading (cryptocurrencies, stocks, forex), characterized by compulsive, high-risk trading behavior.

Each user's behavior is classified based on these categories, with the model assigning one of the four labels to their messages.


In [None]:
# load the messages
merged_df = all_messages_df.merge(labels_df, on="user", how="left")
user_messages2 = merged_df.groupby('user')['message'].apply(list).to_dict()

LLM Function

In [None]:
def predict_with_llama2(messages):
    """
    Uses the LLaMA 3 70B language model via the Groq API to analyze a user's messages and classify
    their gambling disorder behavior into specific categories.

    The function classifies the user's behavior into one of the following categories based on the
    content of their messages:
    - 'betting': Addiction to traditional betting, such as sports betting, horse racing, or casino
      games (physical or online).
    - 'onlinegaming': Addiction to online multiplayer video games, where compulsive playing occurs
      without direct monetary transactions.
    - 'lootboxes': Addiction to random reward systems in video games, where users purchase "boxes"
      for virtual items, similar to slot machines.
    - 'trading': Addiction to financial asset trading (cryptocurrencies, stocks, forex), involving
      compulsive trading behavior and high-risk decisions for immediate gratification.

    The function sends the messages to the LLaMA model for classification and returns the corresponding
    category. The model's response will be one of the following:
    - 'betting'
    - 'onlinegaming'
    - 'lootboxes'
    - 'trading'

    Parameters:
        messages (str): A string containing one or more user messages to be analyzed and classified.

    Returns:
        str: The category representing the user's gambling disorder behavior.
    """

    client = Groq(
        api_key=os.getenv("GROQ_API_KEY"),
    )

    prompt = f"""
    A continuación se te dará una serie de mensajes escritos por una persona que sufre algún tipo de ludopatía (adicción al juego). El objetivo es clasificar la condición según el tipo específico de comportamiento adictivo. Existen las siguientes categorías:

    - 'betting': Hace referencia a la adicción a las apuestas tradicionales, como las deportivas, carreras de caballos o juegos de azar en casinos físicos o en línea (ruleta, póker, etc.).
    - 'onlinegaming': Se refiere a la adicción a los videojuegos en línea multijugador, donde la persona juega compulsivamente sin que haya necesariamente una transacción monetaria directa.
    - 'lootboxes': Es la adicción a sistemas de recompensas aleatorias dentro de videojuegos, donde se compra una "caja" con dinero real para obtener ítems virtuales, generando un comportamiento similar al de las máquinas tragamonedas.
    - 'trading': Engloba la adicción al comercio de activos financieros (como criptomonedas, acciones o forex), en la que el individuo realiza operaciones compulsivas buscando gratificación inmediata, asumiendo altos riesgos.

    Tu respuesta debe ser exclusivamente una de estas palabras, sin frases adicionales, sin comillas y sin explicaciones:

    betting
    onlinegaming
    lootboxes
    trading

    Ahora evalúa según los siguientes mensajes:
    {messages}
    """

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-70b-8192",
    )

    response = chat_completion.choices[0].message.content

    return response

Iterate over every message

In [None]:
pred_rows = []
for i, user in enumerate(sorted(user_messages2.keys())):
    label = predict_with_llama2(user_messages2[user])
    try:
        pred_rows.append({
            'user': user,
            'label': label,
        })

        if i % 10 == 0 and i != 0:
            temp_df = pd.DataFrame(pred_rows)
            temp_df.to_csv("predictions_backup_task2.csv", index=False)
            print(f"Saved up to iteration {i}")

    except Exception as e:
        print(f"Error while processing: {e}")

predictions = pd.DataFrame(pred_rows)

predictions.to_csv("predictions_task2.csv", index=False)

Load the actual results for task 2.

In [None]:
annotation_path = data_path / "task2" / "train" / "gold_task2.txt"
df = pd.read_csv(annotation_path)
annotation_users = df.Subject.values
df.head()

labels_df = df.rename(columns={"Subject": "user", "Type": "label"})
labels_df.head()


Unnamed: 0,user,label
0,user1036,betting
1,user1037,betting
2,user1150,betting
3,user124,betting
4,user1301,betting


Evaluate the result

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df_pred = pd.read_csv('./predictions_task2.csv')  # Archivo con las predicciones


df = pd.merge(labels_df, df_pred, on='user')

accuracy = accuracy_score(df['label_x'], df['label_y'])  # Comparar las columnas de etiquetas
conf_matrix = confusion_matrix(df['label_x'], df['label_y'])
class_report = classification_report(df['label_x'], df['label_y'])

print("Accuracy:", accuracy)
print("\nConfusion matrix:\n", conf_matrix)
print("\nReport:\n", class_report)


Accuracy: 0.6862170087976539

Confusion matrix:
 [[ 85   0   0   0]
 [  8   8  10   0]
 [ 86   3  15   0]
 [  0   0   0 126]]

Report:
               precision    recall  f1-score   support

     betting       0.47      1.00      0.64        85
   lootboxes       0.73      0.31      0.43        26
onlinegaming       0.60      0.14      0.23       104
     trading       1.00      1.00      1.00       126

    accuracy                           0.69       341
   macro avg       0.70      0.61      0.58       341
weighted avg       0.73      0.69      0.63       341



The model's performance in Task 2, with an accuracy of 69%, is relatively stronger than in Task 1 but still exhibits notable room for improvement. The confusion matrix highlights that the model is particularly effective at identifying the "trading" category, with a perfect recall of 1.00 and precision of 1.00, which indicates the model can consistently classify this behavior. However, for the other categories, the performance is more uneven.

Specifically, the "betting" class shows a high recall of 1.00, meaning the model can accurately detect all instances of this behavior, but it has low precision (0.47), which suggests that many of the predictions for "betting" are false positives.

Again, these results show that it is hard for a pre-trained model to detect every language cue within the messages.