In [None]:
from firebase_utils import get_data
import pandas as pd
import os
import json
import glob
import psycopg2
from dotenv import load_dotenv

In [2]:
survey_data = get_data()

In [74]:
def clean_datetime(obj):
    if hasattr(obj, 'isoformat'):
        return obj.isoformat()
    return str(obj)

In [75]:
# with open('answers.json', 'w', encoding='utf-8') as f:
#     json.dump(survey_data, f, ensure_ascii=False, indent=2, default=clean_datetime)

In [None]:
load_dotenv()

db_config = {
    "dbname": os.getenv("DB_NAME"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD"),
    "host": os.getenv("DB_HOST"),
    "port": os.getenv("DB_PORT"),
}


In [None]:
structured_data = {}

for answers in survey_data:
    
    answer = answers['answers'] 
    for key, values in answer.items():
        csv_name = key.split("/")[0]
        date = key.split("/")[1]
        new_values = {f"{date} {k}": v for k, v in values.items()}
            
        if csv_name not in structured_data:
            structured_data[csv_name] = {}
        structured_data[csv_name].update(new_values)
        
        
conn = psycopg2.connect(**db_config)
cur = conn.cursor()

for csv_name, values in structured_data.items():
    csv_path = f"survey_datasets/{csv_name}.csv"
    csv_name_db = csv_name + ".csv"  
    df = pd.read_csv(csv_path, low_memory=False)
    if 'engagement' not in df.columns:
        df['engagement'] = None  # Adiciona a coluna vazia
        df['engagement'] = df['engagement'].astype('object')  # Garante o tipo

    print(f"Processing {csv_name}.")
    
    # Atualiza no DataFrame com os valores de engagement
    for info, engagement in values.items():
        datetime_column = info.split(" ")[0] + " " + info.split(" ")[1].replace("-", ":")
        face = int(info.split(" ")[2].split("-")[0].replace("face", ""))
        face_bbox_x1, face_bbox_y1, face_bbox_x2, face_bbox_y2 = map(int, info.split(" ")[2].replace(".jpg","").split("-")[1].split("_"))
        filter_condition = ( (df['datetime'] == datetime_column) & (df['face'] == face) & (df['face_bbox_x1'] == face_bbox_x1) & (df['face_bbox_y1'] == face_bbox_y1) & (df['face_bbox_x2'] == face_bbox_x2) & (df['face_bbox_y2'] == face_bbox_y2))
        df.loc[filter_condition, "engagement"] = engagement
        
        cur.execute("""
            UPDATE datasets
            SET engagement = %s
            WHERE datetime = %s AND face = %s
                AND face_bbox_x1 = %s AND face_bbox_y1 = %s
                AND face_bbox_x2 = %s AND face_bbox_y2 = %s
                AND name = %s
        """, (
            engagement, datetime_column, face,
            face_bbox_x1, face_bbox_y1, face_bbox_x2, face_bbox_y2,
            csv_name_db
        )) 

    df.to_csv(csv_path, index=False)
    print(f"Updated {csv_path} with engagement data.")

Processing NEXI - Apresentação-20250213_123528-Meeting Recording.
Processing Zoom_Class_Meeting_Downing_Soc_220_2-18-2021-clean.
Processing 2021-05-24_-_Club_Meeting_-_Gallery_View-clean.
Processing TMU_-_History_102_-_ZOOM_Class_Meeting_-_March_25th,_2020-clean.
Processing concerta_2024-09-30_atualizado_baixo.


In [None]:
        # print(f"CSV: {csv_name}, Date: {datetime_column}, Face: {face}, Bbox: ({face_bbox_x1}, {face_bbox_y1}, {face_bbox_x2}, {face_bbox_y2}), Engagement: {engagement}")
        # values_found = df.loc[filter_condition]
        # if not values_found.empty:
            # print("OI")
            # print(f"Warning: No matching row found for {csv_name} with datetime {datetime_column}, face {face}, bbox ({face_bbox_x1}, {face_bbox_y1}, {face_bbox_x2}, {face_bbox_y2})")

In [None]:
# Lista todos os arquivos CSV na pasta
csv_files = glob.glob("survey_datasets/*.csv")

for file in csv_files:
    print(f"Removendo coluna 'engagement' de: {file}")
    df = pd.read_csv(file, low_memory=False)
    
    if 'engagement' in df.columns:
        df.drop(columns=['engagement'], inplace=True)
        df.to_csv(file, index=False)
        print("Coluna removida com sucesso.\n")
    else:
        print("Coluna 'engagement' não existe neste arquivo.")

Removendo coluna 'engagement' de: survey_datasets\2021-05-24_-_Club_Meeting_-_Gallery_View-clean.csv
Coluna removida com sucesso.

Removendo coluna 'engagement' de: survey_datasets\concerta_2024-09-30_atualizado_baixo.csv
Coluna removida com sucesso.

Removendo coluna 'engagement' de: survey_datasets\NEXI - Apresentação-20250213_123528-Meeting Recording.csv


  df = pd.read_csv(file)


Coluna removida com sucesso.

Removendo coluna 'engagement' de: survey_datasets\PE Meetup Recording with Gallery View-clean.csv
Coluna 'engagement' não existe neste arquivo.
Removendo coluna 'engagement' de: survey_datasets\TMU_-_History_102_-_ZOOM_Class_Meeting_-_March_25th,_2020-clean.csv


  df = pd.read_csv(file)


Coluna removida com sucesso.

Removendo coluna 'engagement' de: survey_datasets\Zoom_Class_Meeting_Downing_Soc_220_2-18-2021-clean.csv


  df = pd.read_csv(file)


Coluna removida com sucesso.



In [4]:
df = pd.read_csv("survey_datasets/NEXI - Apresentação-20250213_123528-Meeting Recording.csv", low_memory=False)
df['engagement'].unique()

array([nan, '5', '4', '8', '2', '6', '3', '9', '7', 'No context image',
       '10'], dtype=object)

In [10]:
df = pd.read_csv("survey_datasets/concerta_2024-09-30_atualizado_baixo.csv", low_memory=False)
df['engagement'].unique()

array([nan,  5.,  6.,  7.,  4.])

In [6]:
df = pd.read_csv("survey_datasets/Zoom_Class_Meeting_Downing_Soc_220_2-18-2021-clean.csv", low_memory=False)
df['engagement'].unique()

array([nan, '2', '8', '9', '3', 'Imagem sem contexto'], dtype=object)

In [7]:
df = pd.read_csv("survey_datasets/TMU_-_History_102_-_ZOOM_Class_Meeting_-_March_25th,_2020-clean.csv", low_memory=False)
df['engagement'].unique()

array([nan, '1', '6', '9', '3', '7', '2', 'No context image', '10', '4',
       '8', '5'], dtype=object)

In [None]:
df = pd.read_csv("survey_datasets/PE Meetup Recording with Gallery View-clean.csv", low_memory=False)
df['engagement'].unique()

In [9]:
df = pd.read_csv("survey_datasets/2021-05-24_-_Club_Meeting_-_Gallery_View-clean.csv", low_memory=False)
df['engagement'].unique()

array([nan])