In [1]:
import os
import json
import pandas as pd

In [2]:
def generate_prompt(label):
    if label== 1:
        return "Teniendo en cuenta que el usuario está deprimido, justifica los motivos de esto con base en sus publicaciones"
    elif label==0:
        return "Teniendo en cuenta que el usuario no está deprimido, justifica los motivos de esto con base en sus publicaciones"
    elif label==-1:
        return "Trata de inferir si el usuario, con base en la descripción textual de sus publicaciones, presenta síntomas de depresión o no"

In [3]:
def process_mentalrisk_data():
    users_dirs = ["Datasets/MentalRisk/subjects_train", "Datasets/MentalRisk/subjects_trial"]
    usuarios = []

    # Read All User Data
    for dir in users_dirs:
        user_files = [os.path.join(dir,file) for file in os.listdir(dir)]
        
        for user_file in user_files:
            filename,extension = os.path.splitext(user_file)
            
            with open(user_file,"r") as f:
                content=f.read()
                user=json.loads(content)
                usuarios.append({"username" : filename.split("/")[-1], "posts" : user})


    # Read All Labels to generate prompt
    train_labels = pd.read_csv("Datasets/MentalRisk/gold_train_task2a.csv")
    trial_labels = pd.read_csv("Datasets/MentalRisk/gold_trial_task2a.csv")
    labels = pd.concat([train_labels,trial_labels])
    labels.set_index("Subject",inplace=True)
    finallabels = labels.to_dict(orient='index')

    # Transform Data into a new format
    final_users = []

    for i,user in enumerate(usuarios):
        label = finallabels[user["username"]]["label"]
        prompt = generate_prompt(label)

        user_template = {
            "username": user["username"],
            "depressed": label,
            "response": "",
            "posts": [],
            "prompt" : prompt
        }
        
        for j,post in enumerate(user["posts"]):
            user_template["posts"].append({
                "id": j,
                "text": post["message"],
                "image_path": "None",
                "image_description": "None"
            })

        final_users.append(user_template)
            
    return final_users

In [6]:
import shutil


def process_reddit_data():
    root_dir = "Datasets/Reddit"
    users_dir = os.listdir(root_dir)
    text_extension = ['.txt']
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']


    users_data = []

    
    for user_dir in users_dir:
        posts_files=os.listdir(os.path.join(root_dir,user_dir))

        user_template = {
            "username": f"{user_dir}",
            "depressed": -1,
            "response": "",
            "posts": [],
            "prompt" : generate_prompt(-1)
        }

        for j in range(len(posts_files)):
            # Obtiene el texto de la publicación
            post_metadata = os.path.splitext(posts_files[j])[0].split("_")
            text = post_metadata[3:]
            text = " ".join(text)
            
            # Comprueba si es imagen o texto
            file_extension = os.path.splitext(posts_files[j])[1].lower()
            
            if file_extension in text_extension:
                image_path = "None"
            elif file_extension in image_extensions:
                image_path = posts_files[j]
                shutil.move(os.path.join(root_dir,user_dir,posts_files[j]),"UserImages")
            
            # Añade la publicación a la lista
            user_template["posts"].append({
                "id": j,
                "text": text,
                "image_path": image_path,
                "image_description": "None"
            })
            
        users_data.append(user_template)

    return users_data

reddit_data = process_reddit_data()
reddit_data

[{'username': 'Healthy-Decision-157',
  'depressed': -1,
  'response': '',
  'posts': [{'id': 0,
    'text': 'i feel like im misdiagnosed',
    'image_path': 'None',
    'image_description': 'None'},
   {'id': 1,
    'text': 'can i be depressed and still feel happy for the',
    'image_path': 'None',
    'image_description': 'None'},
   {'id': 2,
    'text': 'achievements missing',
    'image_path': 'None',
    'image_description': 'None'}],
  'prompt': 'Trata de inferir si el usuario, con base en la descripción textual de sus publicaciones, presenta síntomas de depresión o no'},
 {'username': 'lostinthematrix',
  'depressed': -1,
  'response': '',
  'posts': [{'id': 0,
    'text': 'mornings are the worst',
    'image_path': 'None',
    'image_description': 'None'},
   {'id': 1,
    'text': 'everyone always leaves',
    'image_path': 'None',
    'image_description': 'None'}],
  'prompt': 'Trata de inferir si el usuario, con base en la descripción textual de sus publicaciones, presenta 

In [8]:
users_path = "UnlabeledImages"

#reddit_data = process_reddit_data()
mentalrisk_users = process_mentalrisk_data()
fulldata = reddit_data+mentalrisk_users

for i in range(len(fulldata)):
    with open(os.path.join(users_path,f"user_{i}.json"),"w",encoding="utf8") as f:
        fulldata[i]["username"]=f"user_{i}"
        content=json.dumps(fulldata[i],ensure_ascii=False)
        f.write(content)

In [9]:
import os
import json

source_dir = "LabeledImages"
destdir = "PostPrompts"

for i,file in enumerate(os.listdir(source_dir)):
    with open(os.path.join(source_dir,file),"r") as user_data:

        userobj = json.loads(user_data.read())
        userprompt = userobj["prompt"] + "\n\n"
        postxt ="\n"

        for post in userobj["posts"]:
            postxt+=post["text"]+"\nimage content : "+post["image_description"]+"\n"

        userprompt+=postxt
        print(userprompt)

        with open(os.path.join(destdir,f"{file}.txt"),"w") as prompt:
            prompt.write(userprompt)

FileNotFoundError: [Errno 2] No such file or directory: 'LabeledImages'