In [5]:
import json
import pandas as pd
from datetime import datetime


In [None]:

classic_df = pd.read_csv('../src/data/data.csv')
classic_df[['DIA', 'MES', 'ANO']] = classic_df['NASCIMENTO'].str.split('/', expand=True)

# Convert columns to numeric, coercing errors to NaN
classic_df['ALTURA'] = pd.to_numeric(classic_df['ALTURA'], errors='coerce')
classic_df['DIA'] = pd.to_numeric(classic_df['DIA'], errors='coerce')
classic_df['MES'] = pd.to_numeric(classic_df['MES'], errors='coerce')
classic_df['ANO'] = pd.to_numeric(classic_df['ANO'], errors='coerce')

# Lowercase all string columns except 'HOBBY' and 'FOTOS'
for col in classic_df.select_dtypes(include='object').columns:
    if col not in ['HOBBY', 'FOTOS']:
        classic_df[col] = classic_df[col].str.lower()

# Split hobbies, lowercase each hobby, and keep at most the first 3 items
classic_df['HOBBY'] = classic_df['HOBBY'].apply(
    lambda x: [hobby.strip().lower() for hobby in x.split(';')][:3] if pd.notnull(x) else []
)

classic_df = classic_df.drop(columns=['LIDERANÇA'])

today = datetime.today()
classic_df['IDADE'] = classic_df.apply(
    lambda row: today.year - row['ANO'] - ((today.month, today.day) < (row['MES'], row['DIA'])),
    axis=1
)

classic_df['PROFILEPIC'] = f'profile-pics/{classic_df['NOME'].str.replace(' ', '-', regex=False)}.jpg'

with open('../src/data/classic2.json', 'w', encoding='utf-8') as f:
    json.dump(json.loads(classic_df.to_json(orient='records', force_ascii=False, indent=2)), f, ensure_ascii=False, indent=2, separators=(',', ': '))