In [1]:
import pandas as pd
import numpy as np
import re
from tqdm import tqdm
import os
from sentence_transformers import SentenceTransformer, util
import torch
from dotenv import load_dotenv
from mongo_wrapper.mongo_wrapper import MongoWrapper

# states to categorize
STATES = [
    "Guanajuato",
    "Michoacán",
    "Sinaloa",
    "Chihuahua",
    "Guerrero",
    "Tamaulipas",
    "Baja California",
    "Zacatecas",
    "Colima",
    "Jalisco",
    "Aguascalientes",
    "Baja California Sur",
    "Campeche",
    "Coahuila",
    "Durango",
    "Hidalgo",
    "Estado de México",
    "Ciudad de México",
    "Morelos",
    "Nayarit",
    "Nuevo León",
    "Oaxaca",
    "Puebla",
    "Querétaro",
    "Quintana Roo",
    "San Luis Potosí",
    "Sonora",
    "Tabasco",
    "Tlaxcala",
    "Veracruz",
    "Yucatán",
    "Chiapas"]


# channels to analyze 
TARGET_CHANNELS = [
    "elpaismexico",
    "ElUniversalOnline",
    "proceso_unofficial",
    "politicomx",
    "lajornada_unofficial",
    "larazondemexico",
    "sinembargomx",
    "elpaisamerica"]

# define dimensions of poverty 
POVERTY_DIMENSIONS = {
    "INCOME": """
    desempleo, salario mínimo, bajos ingresos, deudas familiares, pobreza laboral, 
    pérdida de empleo, ingreso insuficiente, precariedad laboral, empleo informal, 
    falta de oportunidades laborales, reducción de salario, inestabilidad económica, 
    recesión, subempleo, despidos masivos, contratos temporales, informalidad, 
    costos de vida elevados, falta de empleo formal, insuficiencia salarial
    """,

    "ACCESS TO HEALTH SERVICES": """
    falta de acceso a servicios de salud, hospitales saturados, escasez de medicamentos, 
    deficiencias en la atención médica, carencia de personal médico, emergencia sanitaria, 
    costos elevados de tratamientos, cierre de centros de salud, lista de espera prolongada, 
    equipos médicos inoperantes, desabasto de vacunas, falta de atención especializada, 
    dificultades para obtener citas médicas, crisis hospitalaria, cobertura médica limitada
    """,

    "EDUCATIONAL_LAG": """
    deserción escolar, suspensión de clases, carencia de docentes, infraestructura educativa deficiente, 
    dificultades de acceso a la educación, brecha digital, educación interrumpida, rezago académico, 
    falta de recursos escolares, inasistencia por motivos económicos, limitaciones en conectividad, 
    acceso desigual a la educación, deficiencias en formación básica, carencia de materiales educativos
    """,

    "ACCESS TO SOCIAL SECURITY": """
    empleo informal, ausencia de prestaciones sociales, falta de contrato laboral, 
    exclusión del sistema de pensiones, carencia de protección social, trabajo precario, 
    derechos laborales no garantizados, falta de cotización al sistema, desprotección estructural, 
    dificultades para acceder al seguro social, informalidad laboral, empleo sin afiliación
    """,

    "HOUSING": """
    vivienda precaria, hacinamiento, falta de servicios básicos, 
    infraestructura deteriorada, zonas marginadas, viviendas inseguras
    """,
    
    "ACCESS TO FOOD": """
    inseguridad alimentaria, acceso limitado a alimentos, inflación, precios, 
    raciones insuficientes, pobreza alimentaria, aumento de precios, comeder comunitario, 
    canasta básica, crisis alimentaria, alza de precios, ayuda alimentaria, 
    insuficiencia nutricional, alimentación deficiente, encarecimiento de alimentos,
    inflación en alimentos, carencia alimentaria, productos básicos, banco de alimentos, 
    alimentos inaccesibles, gasto alimentario elevado, programas alimentarios,
    """,

    "SOCIAL_COHESION": """
    discriminación étnica, marginación social, exclusión de comunidades vulnerables, 
    conflictos intercomunitarios, tensiones sociales, barreras sociales, 
    desigualdad estructural, aislamiento social
    """}


class PovertyDimensionClassifier:
    def __init__(self):
        self.model = SentenceTransformer('hiiamsid/sentence_similarity_spanish_es')
        self.dimension_embeddings = {}
        
        for dim_name, dim_terms in POVERTY_DIMENSIONS.items():
            terms = [t.strip() for t in dim_terms.split(',') if t.strip()]
            term_embeddings = self.model.encode(terms, convert_to_tensor=True)
            avg_embedding = torch.mean(term_embeddings, dim=0)  
            self.dimension_embeddings[dim_name] = avg_embedding
    
    def clean_text(self, text):
        if not isinstance(text, str): 
            return ""
        text = re.sub(r'<.*?>', ' ', text)
        text = re.sub(r'http\S+', '', text)
        text = re.sub(r'[^\w\sáéíóúüñÁÉÍÓÚÜÑ]', ' ', text)
        return re.sub(r'\s+', ' ', text).strip().lower()
    
    def classify_text(self, text, threshold=0.10):
        if not text:
            return None, 0.0
        
        cleaned_text = self.clean_text(text)
        if len(cleaned_text) < 10:
            return None, 0.0
        
        text_embedding = self.model.encode(cleaned_text, convert_to_tensor=True)
        
        max_score = -1
        best_dim = None
        
        for dim_name, dim_embedding in self.dimension_embeddings.items():
            text_embed_2d = text_embedding.unsqueeze(0)
            dim_embed_2d = dim_embedding.unsqueeze(0)
            
            cos_sim = util.cos_sim(text_embed_2d, dim_embed_2d).item()
            
            if cos_sim > max_score and cos_sim >= threshold:
                max_score = cos_sim
                best_dim = dim_name
        
        return (best_dim, max_score) if best_dim else (None, max_score)

# load only data of interest from MongoDB
def load_state_posts():
    
    MONGO_IP = os.getenv("MONGO_IP")
    MONGO_PORT = os.getenv("MONGO_PORT")
    MONGO_DB = os.getenv("MONGO_DB")
    MONGO_USERNAME = os.getenv("MONGO_USERNAME")
    MONGO_PASSWORD = os.getenv("MONGO_PASSWORD")
    
    mongo_client = MongoWrapper(
        db=MONGO_DB,
        user=MONGO_USERNAME,
        password=MONGO_PASSWORD,
        ip=MONGO_IP,
        port=MONGO_PORT)
    
    all_channels = mongo_client.get_all_collections()
    available_target_channels = [channel for channel in TARGET_CHANNELS if channel in all_channels]
    
    # initialize a dictionary to store posts for each state
    state_posts = {state: [] for state in STATES}
    
    # classify posts by using regex patterns - if a post contains a state name, it will be classified as that state
    state_patterns = {state: re.compile(r'\b' + re.escape(state) + r'\b', re.IGNORECASE) for state in STATES}
    
    for channel in tqdm(available_target_channels, desc="loading channels"):
        posts = mongo_client.get_collection_entries(collection=channel)
        
        print(f"channel: {channel} - {len(posts)} post found")
        
        for post in tqdm(posts, desc=f"analysis {channel}", leave=False):
            post_text = post.get('text', '')
                
            for state, pattern in state_patterns.items():
                if pattern.search(post_text):
                    state_posts[state].append({
                        'text': post_text,
                        'author': post.get('author', ''),
                        'posting_ts': post.get('posting_ts', ''),
                        'channel': channel})
    
    for state in STATES:
        state_posts[state] = pd.DataFrame(state_posts[state])
    
    return state_posts

# classify posts by dimensions of poverty 
def analyze_poverty_dimensions(state_posts):
    classifier = PovertyDimensionClassifier()
    
    results = []

    for state, df in state_posts.items():
        print(f"\nanalyzing {state} ({len(df)} posts)...")
    
        dimension_counts = {dim: 0 for dim in POVERTY_DIMENSIONS.keys()}
        dimension_counts["OTHER"] = 0  # other types of posts 
    
        for idx, row in tqdm(df.iterrows(), total=len(df), desc=f"Classifying {state}"):
            text = row['text']
        
            dimension, score = classifier.classify_text(text)
        
            if dimension:
                dimension_counts[dimension] += 1
            else:
                dimension_counts["OTHER"] += 1
        
        total_posts = len(df)
        dimension_percentages = {dim: (count / total_posts) * 100 for dim, count in dimension_counts.items()}
        
        print(f"\nresults for {state}:")
        print(f"total posts: {total_posts}")
        print("\ndistribution of posts across dimensions of poverty:")
        
        for dim, count in dimension_counts.items():
            dim_name = dim if dim != "OTHER" else "non-poverty posts"
            pct = dimension_percentages[dim]
            print(f"- {dim_name}: {count} post ({pct:.1f}%)")
        
        for dim in list(POVERTY_DIMENSIONS.keys()) + ["OTHER"]:
            results.append({
                'state': state,
                'dimension': dim,
                'count': dimension_counts[dim],
                'percentage': dimension_percentages[dim],
                'total_posts': total_posts})
    
    results_df = pd.DataFrame(results)
    return results_df

def main():
    state_posts = load_state_posts()
    
    results = analyze_poverty_dimensions(state_posts)
    
    results.to_csv("tg_results_improved.csv", index=False)
    
    pivot_counts = results.pivot(index='state', columns='dimension', values='count')
    print("\ncount of posts per dimension:")
    print(pivot_counts)
    
    pivot_percentages = results.pivot(index='state', columns='dimension', values='percentage')
    print("\npercentage of posts per dimension:")
    print(pivot_percentages.round(1))

if __name__ == "__main__":
    main()

2025-05-12 11:14:10,968 INFO Connected to thesis database on 206.81.16.39
loading channels:   0%|          | 0/8 [00:00<?, ?it/s]

channel: elpaismexico - 1750 post found


loading channels:  12%|█▎        | 1/8 [00:00<00:04,  1.63it/s]

channel: ElUniversalOnline - 2435 post found


loading channels:  25%|██▌       | 2/8 [00:02<00:06,  1.13s/it]

channel: proceso_unofficial - 3141 post found


loading channels:  38%|███▊      | 3/8 [00:02<00:03,  1.37it/s]

channel: politicomx - 5103 post found


loading channels:  50%|█████     | 4/8 [00:02<00:02,  1.64it/s]

channel: lajornada_unofficial - 18673 post found


loading channels:  62%|██████▎   | 5/8 [00:03<00:02,  1.44it/s]

channel: larazondemexico - 4248 post found


loading channels:  75%|███████▌  | 6/8 [00:04<00:01,  1.32it/s]

channel: sinembargomx - 9525 post found


loading channels:  88%|████████▊ | 7/8 [00:05<00:00,  1.27it/s]

channel: elpaisamerica - 1411 post found


loading channels: 100%|██████████| 8/8 [00:05<00:00,  1.39it/s]



analyzing Guanajuato (219 posts)...


Classifying Guanajuato: 100%|██████████| 219/219 [00:23<00:00,  9.35it/s]



results for Guanajuato:
total posts: 219

distribution of posts across dimensions of poverty:
- INCOME: 49 post (22.4%)
- ACCESS TO HEALTH SERVICES: 46 post (21.0%)
- EDUCATIONAL_LAG: 14 post (6.4%)
- ACCESS TO SOCIAL SECURITY: 29 post (13.2%)
- HOUSING: 31 post (14.2%)
- ACCESS TO FOOD: 7 post (3.2%)
- SOCIAL_COHESION: 13 post (5.9%)
- non-poverty posts: 30 post (13.7%)

analyzing Michoacán (317 posts)...


Classifying Michoacán: 100%|██████████| 317/317 [00:35<00:00,  8.95it/s]



results for Michoacán:
total posts: 317

distribution of posts across dimensions of poverty:
- INCOME: 69 post (21.8%)
- ACCESS TO HEALTH SERVICES: 66 post (20.8%)
- EDUCATIONAL_LAG: 17 post (5.4%)
- ACCESS TO SOCIAL SECURITY: 38 post (12.0%)
- HOUSING: 51 post (16.1%)
- ACCESS TO FOOD: 13 post (4.1%)
- SOCIAL_COHESION: 34 post (10.7%)
- non-poverty posts: 29 post (9.1%)

analyzing Sinaloa (265 posts)...


Classifying Sinaloa: 100%|██████████| 265/265 [00:36<00:00,  7.26it/s]



results for Sinaloa:
total posts: 265

distribution of posts across dimensions of poverty:
- INCOME: 57 post (21.5%)
- ACCESS TO HEALTH SERVICES: 62 post (23.4%)
- EDUCATIONAL_LAG: 11 post (4.2%)
- ACCESS TO SOCIAL SECURITY: 57 post (21.5%)
- HOUSING: 21 post (7.9%)
- ACCESS TO FOOD: 17 post (6.4%)
- SOCIAL_COHESION: 13 post (4.9%)
- non-poverty posts: 27 post (10.2%)

analyzing Chihuahua (230 posts)...


Classifying Chihuahua: 100%|██████████| 230/230 [00:30<00:00,  7.60it/s]



results for Chihuahua:
total posts: 230

distribution of posts across dimensions of poverty:
- INCOME: 17 post (7.4%)
- ACCESS TO HEALTH SERVICES: 47 post (20.4%)
- EDUCATIONAL_LAG: 30 post (13.0%)
- ACCESS TO SOCIAL SECURITY: 39 post (17.0%)
- HOUSING: 34 post (14.8%)
- ACCESS TO FOOD: 13 post (5.7%)
- SOCIAL_COHESION: 18 post (7.8%)
- non-poverty posts: 32 post (13.9%)

analyzing Guerrero (343 posts)...


Classifying Guerrero: 100%|██████████| 343/343 [11:55<00:00,  2.09s/it]  



results for Guerrero:
total posts: 343

distribution of posts across dimensions of poverty:
- INCOME: 80 post (23.3%)
- ACCESS TO HEALTH SERVICES: 56 post (16.3%)
- EDUCATIONAL_LAG: 20 post (5.8%)
- ACCESS TO SOCIAL SECURITY: 57 post (16.6%)
- HOUSING: 49 post (14.3%)
- ACCESS TO FOOD: 9 post (2.6%)
- SOCIAL_COHESION: 44 post (12.8%)
- non-poverty posts: 28 post (8.2%)

analyzing Tamaulipas (328 posts)...


Classifying Tamaulipas: 100%|██████████| 328/328 [00:38<00:00,  8.48it/s]



results for Tamaulipas:
total posts: 328

distribution of posts across dimensions of poverty:
- INCOME: 68 post (20.7%)
- ACCESS TO HEALTH SERVICES: 57 post (17.4%)
- EDUCATIONAL_LAG: 9 post (2.7%)
- ACCESS TO SOCIAL SECURITY: 55 post (16.8%)
- HOUSING: 28 post (8.5%)
- ACCESS TO FOOD: 14 post (4.3%)
- SOCIAL_COHESION: 31 post (9.5%)
- non-poverty posts: 66 post (20.1%)

analyzing Baja California (72 posts)...


Classifying Baja California: 100%|██████████| 72/72 [00:03<00:00, 20.26it/s]



results for Baja California:
total posts: 72

distribution of posts across dimensions of poverty:
- INCOME: 20 post (27.8%)
- ACCESS TO HEALTH SERVICES: 25 post (34.7%)
- EDUCATIONAL_LAG: 1 post (1.4%)
- ACCESS TO SOCIAL SECURITY: 5 post (6.9%)
- HOUSING: 10 post (13.9%)
- ACCESS TO FOOD: 1 post (1.4%)
- SOCIAL_COHESION: 3 post (4.2%)
- non-poverty posts: 7 post (9.7%)

analyzing Zacatecas (299 posts)...


Classifying Zacatecas: 100%|██████████| 299/299 [00:56<00:00,  5.31it/s]



results for Zacatecas:
total posts: 299

distribution of posts across dimensions of poverty:
- INCOME: 69 post (23.1%)
- ACCESS TO HEALTH SERVICES: 60 post (20.1%)
- EDUCATIONAL_LAG: 25 post (8.4%)
- ACCESS TO SOCIAL SECURITY: 26 post (8.7%)
- HOUSING: 65 post (21.7%)
- ACCESS TO FOOD: 10 post (3.3%)
- SOCIAL_COHESION: 30 post (10.0%)
- non-poverty posts: 14 post (4.7%)

analyzing Colima (143 posts)...


Classifying Colima: 100%|██████████| 143/143 [00:07<00:00, 18.80it/s]



results for Colima:
total posts: 143

distribution of posts across dimensions of poverty:
- INCOME: 31 post (21.7%)
- ACCESS TO HEALTH SERVICES: 51 post (35.7%)
- EDUCATIONAL_LAG: 9 post (6.3%)
- ACCESS TO SOCIAL SECURITY: 10 post (7.0%)
- HOUSING: 4 post (2.8%)
- ACCESS TO FOOD: 10 post (7.0%)
- SOCIAL_COHESION: 9 post (6.3%)
- non-poverty posts: 19 post (13.3%)

analyzing Jalisco (317 posts)...


Classifying Jalisco: 100%|██████████| 317/317 [00:28<00:00, 10.98it/s]



results for Jalisco:
total posts: 317

distribution of posts across dimensions of poverty:
- INCOME: 91 post (28.7%)
- ACCESS TO HEALTH SERVICES: 88 post (27.8%)
- EDUCATIONAL_LAG: 17 post (5.4%)
- ACCESS TO SOCIAL SECURITY: 33 post (10.4%)
- HOUSING: 32 post (10.1%)
- ACCESS TO FOOD: 10 post (3.2%)
- SOCIAL_COHESION: 28 post (8.8%)
- non-poverty posts: 18 post (5.7%)

analyzing Aguascalientes (165 posts)...


Classifying Aguascalientes: 100%|██████████| 165/165 [00:15<00:00, 10.74it/s]



results for Aguascalientes:
total posts: 165

distribution of posts across dimensions of poverty:
- INCOME: 29 post (17.6%)
- ACCESS TO HEALTH SERVICES: 29 post (17.6%)
- EDUCATIONAL_LAG: 5 post (3.0%)
- ACCESS TO SOCIAL SECURITY: 30 post (18.2%)
- HOUSING: 21 post (12.7%)
- ACCESS TO FOOD: 12 post (7.3%)
- SOCIAL_COHESION: 7 post (4.2%)
- non-poverty posts: 32 post (19.4%)

analyzing Baja California Sur (13 posts)...


Classifying Baja California Sur: 100%|██████████| 13/13 [00:00<00:00, 26.69it/s]



results for Baja California Sur:
total posts: 13

distribution of posts across dimensions of poverty:
- INCOME: 4 post (30.8%)
- ACCESS TO HEALTH SERVICES: 5 post (38.5%)
- EDUCATIONAL_LAG: 0 post (0.0%)
- ACCESS TO SOCIAL SECURITY: 1 post (7.7%)
- HOUSING: 1 post (7.7%)
- ACCESS TO FOOD: 1 post (7.7%)
- SOCIAL_COHESION: 0 post (0.0%)
- non-poverty posts: 1 post (7.7%)

analyzing Campeche (81 posts)...


Classifying Campeche: 100%|██████████| 81/81 [00:09<00:00,  8.84it/s]



results for Campeche:
total posts: 81

distribution of posts across dimensions of poverty:
- INCOME: 17 post (21.0%)
- ACCESS TO HEALTH SERVICES: 16 post (19.8%)
- EDUCATIONAL_LAG: 6 post (7.4%)
- ACCESS TO SOCIAL SECURITY: 24 post (29.6%)
- HOUSING: 9 post (11.1%)
- ACCESS TO FOOD: 0 post (0.0%)
- SOCIAL_COHESION: 4 post (4.9%)
- non-poverty posts: 5 post (6.2%)

analyzing Coahuila (396 posts)...


Classifying Coahuila: 100%|██████████| 396/396 [00:23<00:00, 16.93it/s]



results for Coahuila:
total posts: 396

distribution of posts across dimensions of poverty:
- INCOME: 82 post (20.7%)
- ACCESS TO HEALTH SERVICES: 43 post (10.9%)
- EDUCATIONAL_LAG: 7 post (1.8%)
- ACCESS TO SOCIAL SECURITY: 118 post (29.8%)
- HOUSING: 23 post (5.8%)
- ACCESS TO FOOD: 8 post (2.0%)
- SOCIAL_COHESION: 63 post (15.9%)
- non-poverty posts: 52 post (13.1%)

analyzing Durango (184 posts)...


Classifying Durango: 100%|██████████| 184/184 [00:05<00:00, 30.81it/s]



results for Durango:
total posts: 184

distribution of posts across dimensions of poverty:
- INCOME: 41 post (22.3%)
- ACCESS TO HEALTH SERVICES: 42 post (22.8%)
- EDUCATIONAL_LAG: 11 post (6.0%)
- ACCESS TO SOCIAL SECURITY: 36 post (19.6%)
- HOUSING: 14 post (7.6%)
- ACCESS TO FOOD: 1 post (0.5%)
- SOCIAL_COHESION: 15 post (8.2%)
- non-poverty posts: 24 post (13.0%)

analyzing Hidalgo (196 posts)...


Classifying Hidalgo: 100%|██████████| 196/196 [00:08<00:00, 22.24it/s]



results for Hidalgo:
total posts: 196

distribution of posts across dimensions of poverty:
- INCOME: 51 post (26.0%)
- ACCESS TO HEALTH SERVICES: 25 post (12.8%)
- EDUCATIONAL_LAG: 7 post (3.6%)
- ACCESS TO SOCIAL SECURITY: 48 post (24.5%)
- HOUSING: 27 post (13.8%)
- ACCESS TO FOOD: 1 post (0.5%)
- SOCIAL_COHESION: 17 post (8.7%)
- non-poverty posts: 20 post (10.2%)

analyzing Estado de México (94 posts)...


Classifying Estado de México: 100%|██████████| 94/94 [00:06<00:00, 14.53it/s]



results for Estado de México:
total posts: 94

distribution of posts across dimensions of poverty:
- INCOME: 26 post (27.7%)
- ACCESS TO HEALTH SERVICES: 24 post (25.5%)
- EDUCATIONAL_LAG: 1 post (1.1%)
- ACCESS TO SOCIAL SECURITY: 11 post (11.7%)
- HOUSING: 8 post (8.5%)
- ACCESS TO FOOD: 2 post (2.1%)
- SOCIAL_COHESION: 6 post (6.4%)
- non-poverty posts: 16 post (17.0%)

analyzing Ciudad de México (275 posts)...


Classifying Ciudad de México: 100%|██████████| 275/275 [00:14<00:00, 19.37it/s]



results for Ciudad de México:
total posts: 275

distribution of posts across dimensions of poverty:
- INCOME: 55 post (20.0%)
- ACCESS TO HEALTH SERVICES: 105 post (38.2%)
- EDUCATIONAL_LAG: 9 post (3.3%)
- ACCESS TO SOCIAL SECURITY: 21 post (7.6%)
- HOUSING: 31 post (11.3%)
- ACCESS TO FOOD: 1 post (0.4%)
- SOCIAL_COHESION: 18 post (6.5%)
- non-poverty posts: 35 post (12.7%)

analyzing Morelos (228 posts)...


Classifying Morelos: 100%|██████████| 228/228 [00:17<00:00, 13.37it/s]



results for Morelos:
total posts: 228

distribution of posts across dimensions of poverty:
- INCOME: 68 post (29.8%)
- ACCESS TO HEALTH SERVICES: 40 post (17.5%)
- EDUCATIONAL_LAG: 12 post (5.3%)
- ACCESS TO SOCIAL SECURITY: 37 post (16.2%)
- HOUSING: 22 post (9.6%)
- ACCESS TO FOOD: 5 post (2.2%)
- SOCIAL_COHESION: 25 post (11.0%)
- non-poverty posts: 19 post (8.3%)

analyzing Nayarit (79 posts)...


Classifying Nayarit: 100%|██████████| 79/79 [00:02<00:00, 26.79it/s]



results for Nayarit:
total posts: 79

distribution of posts across dimensions of poverty:
- INCOME: 16 post (20.3%)
- ACCESS TO HEALTH SERVICES: 36 post (45.6%)
- EDUCATIONAL_LAG: 4 post (5.1%)
- ACCESS TO SOCIAL SECURITY: 15 post (19.0%)
- HOUSING: 5 post (6.3%)
- ACCESS TO FOOD: 0 post (0.0%)
- SOCIAL_COHESION: 2 post (2.5%)
- non-poverty posts: 1 post (1.3%)

analyzing Nuevo León (179 posts)...


Classifying Nuevo León: 100%|██████████| 179/179 [00:08<00:00, 19.90it/s]



results for Nuevo León:
total posts: 179

distribution of posts across dimensions of poverty:
- INCOME: 42 post (23.5%)
- ACCESS TO HEALTH SERVICES: 37 post (20.7%)
- EDUCATIONAL_LAG: 8 post (4.5%)
- ACCESS TO SOCIAL SECURITY: 43 post (24.0%)
- HOUSING: 22 post (12.3%)
- ACCESS TO FOOD: 8 post (4.5%)
- SOCIAL_COHESION: 4 post (2.2%)
- non-poverty posts: 15 post (8.4%)

analyzing Oaxaca (330 posts)...


Classifying Oaxaca: 100%|██████████| 330/330 [00:18<00:00, 18.01it/s]



results for Oaxaca:
total posts: 330

distribution of posts across dimensions of poverty:
- INCOME: 61 post (18.5%)
- ACCESS TO HEALTH SERVICES: 75 post (22.7%)
- EDUCATIONAL_LAG: 17 post (5.2%)
- ACCESS TO SOCIAL SECURITY: 65 post (19.7%)
- HOUSING: 37 post (11.2%)
- ACCESS TO FOOD: 7 post (2.1%)
- SOCIAL_COHESION: 37 post (11.2%)
- non-poverty posts: 31 post (9.4%)

analyzing Puebla (315 posts)...


Classifying Puebla: 100%|██████████| 315/315 [00:12<00:00, 26.15it/s]



results for Puebla:
total posts: 315

distribution of posts across dimensions of poverty:
- INCOME: 69 post (21.9%)
- ACCESS TO HEALTH SERVICES: 53 post (16.8%)
- EDUCATIONAL_LAG: 29 post (9.2%)
- ACCESS TO SOCIAL SECURITY: 52 post (16.5%)
- HOUSING: 34 post (10.8%)
- ACCESS TO FOOD: 6 post (1.9%)
- SOCIAL_COHESION: 23 post (7.3%)
- non-poverty posts: 49 post (15.6%)

analyzing Querétaro (181 posts)...


Classifying Querétaro: 100%|██████████| 181/181 [00:05<00:00, 33.47it/s]



results for Querétaro:
total posts: 181

distribution of posts across dimensions of poverty:
- INCOME: 35 post (19.3%)
- ACCESS TO HEALTH SERVICES: 54 post (29.8%)
- EDUCATIONAL_LAG: 16 post (8.8%)
- ACCESS TO SOCIAL SECURITY: 25 post (13.8%)
- HOUSING: 11 post (6.1%)
- ACCESS TO FOOD: 4 post (2.2%)
- SOCIAL_COHESION: 16 post (8.8%)
- non-poverty posts: 20 post (11.0%)

analyzing Quintana Roo (136 posts)...


Classifying Quintana Roo: 100%|██████████| 136/136 [00:06<00:00, 21.35it/s]



results for Quintana Roo:
total posts: 136

distribution of posts across dimensions of poverty:
- INCOME: 37 post (27.2%)
- ACCESS TO HEALTH SERVICES: 30 post (22.1%)
- EDUCATIONAL_LAG: 3 post (2.2%)
- ACCESS TO SOCIAL SECURITY: 28 post (20.6%)
- HOUSING: 13 post (9.6%)
- ACCESS TO FOOD: 1 post (0.7%)
- SOCIAL_COHESION: 7 post (5.1%)
- non-poverty posts: 17 post (12.5%)

analyzing San Luis Potosí (32 posts)...


Classifying San Luis Potosí: 100%|██████████| 32/32 [00:01<00:00, 25.81it/s]



results for San Luis Potosí:
total posts: 32

distribution of posts across dimensions of poverty:
- INCOME: 6 post (18.8%)
- ACCESS TO HEALTH SERVICES: 14 post (43.8%)
- EDUCATIONAL_LAG: 2 post (6.2%)
- ACCESS TO SOCIAL SECURITY: 0 post (0.0%)
- HOUSING: 1 post (3.1%)
- ACCESS TO FOOD: 0 post (0.0%)
- SOCIAL_COHESION: 2 post (6.2%)
- non-poverty posts: 7 post (21.9%)

analyzing Sonora (241 posts)...


Classifying Sonora: 100%|██████████| 241/241 [00:35<00:00,  6.83it/s]



results for Sonora:
total posts: 241

distribution of posts across dimensions of poverty:
- INCOME: 68 post (28.2%)
- ACCESS TO HEALTH SERVICES: 44 post (18.3%)
- EDUCATIONAL_LAG: 12 post (5.0%)
- ACCESS TO SOCIAL SECURITY: 66 post (27.4%)
- HOUSING: 23 post (9.5%)
- ACCESS TO FOOD: 9 post (3.7%)
- SOCIAL_COHESION: 7 post (2.9%)
- non-poverty posts: 12 post (5.0%)

analyzing Tabasco (79 posts)...


Classifying Tabasco: 100%|██████████| 79/79 [00:02<00:00, 26.86it/s]



results for Tabasco:
total posts: 79

distribution of posts across dimensions of poverty:
- INCOME: 25 post (31.6%)
- ACCESS TO HEALTH SERVICES: 19 post (24.1%)
- EDUCATIONAL_LAG: 2 post (2.5%)
- ACCESS TO SOCIAL SECURITY: 11 post (13.9%)
- HOUSING: 11 post (13.9%)
- ACCESS TO FOOD: 0 post (0.0%)
- SOCIAL_COHESION: 3 post (3.8%)
- non-poverty posts: 8 post (10.1%)

analyzing Tlaxcala (39 posts)...


Classifying Tlaxcala: 100%|██████████| 39/39 [00:09<00:00,  3.91it/s]



results for Tlaxcala:
total posts: 39

distribution of posts across dimensions of poverty:
- INCOME: 2 post (5.1%)
- ACCESS TO HEALTH SERVICES: 9 post (23.1%)
- EDUCATIONAL_LAG: 2 post (5.1%)
- ACCESS TO SOCIAL SECURITY: 7 post (17.9%)
- HOUSING: 2 post (5.1%)
- ACCESS TO FOOD: 0 post (0.0%)
- SOCIAL_COHESION: 10 post (25.6%)
- non-poverty posts: 7 post (17.9%)

analyzing Veracruz (264 posts)...


Classifying Veracruz: 100%|██████████| 264/264 [00:09<00:00, 27.19it/s]



results for Veracruz:
total posts: 264

distribution of posts across dimensions of poverty:
- INCOME: 72 post (27.3%)
- ACCESS TO HEALTH SERVICES: 52 post (19.7%)
- EDUCATIONAL_LAG: 15 post (5.7%)
- ACCESS TO SOCIAL SECURITY: 51 post (19.3%)
- HOUSING: 27 post (10.2%)
- ACCESS TO FOOD: 2 post (0.8%)
- SOCIAL_COHESION: 19 post (7.2%)
- non-poverty posts: 26 post (9.8%)

analyzing Yucatán (74 posts)...


Classifying Yucatán: 100%|██████████| 74/74 [00:02<00:00, 31.50it/s]



results for Yucatán:
total posts: 74

distribution of posts across dimensions of poverty:
- INCOME: 13 post (17.6%)
- ACCESS TO HEALTH SERVICES: 13 post (17.6%)
- EDUCATIONAL_LAG: 6 post (8.1%)
- ACCESS TO SOCIAL SECURITY: 12 post (16.2%)
- HOUSING: 11 post (14.9%)
- ACCESS TO FOOD: 9 post (12.2%)
- SOCIAL_COHESION: 6 post (8.1%)
- non-poverty posts: 4 post (5.4%)

analyzing Chiapas (219 posts)...


Classifying Chiapas: 100%|██████████| 219/219 [00:11<00:00, 19.37it/s]



results for Chiapas:
total posts: 219

distribution of posts across dimensions of poverty:
- INCOME: 26 post (11.9%)
- ACCESS TO HEALTH SERVICES: 64 post (29.2%)
- EDUCATIONAL_LAG: 25 post (11.4%)
- ACCESS TO SOCIAL SECURITY: 17 post (7.8%)
- HOUSING: 29 post (13.2%)
- ACCESS TO FOOD: 6 post (2.7%)
- SOCIAL_COHESION: 37 post (16.9%)
- non-poverty posts: 15 post (6.8%)

count of posts per dimension:
dimension            ACCESS TO FOOD  ACCESS TO HEALTH SERVICES  \
state                                                            
Aguascalientes                   12                         29   
Baja California                   1                         25   
Baja California Sur               1                          5   
Campeche                          0                         16   
Chiapas                           6                         64   
Chihuahua                        13                         47   
Ciudad de México                  1                        105   
Coa