In [1]:
import pandas as pd

# Read the CSV file into a pandas DataFrame
data = pd.read_csv('autism_reddit_new_keywords.csv')

# Define the mapping of themes to keywords
themes = {
    'symptoms': ['symptom', 'sign', 'behavior'],
    'care/treatment': ['care', 'treatment', 'therapy'],
    'drugs/interventions': ['drug', 'medication', 'intervention'],
    'epidemiology': ['epidemiology', 'prevalence', 'incidence'],
    'stigma': ['stigma', 'discrimination'],
    'diagnosis': ['diagnosis', 'assessment'],
    'assistive technologies (smart)': ['assistive', 'technology', 'smart'],
    'burden of ASD': ['burden', 'challenges'],
    'caregivers of ASD': ['caregiver', 'parent', 'family'],
    'natural cure': ['natural', 'alternative', 'cure'],
    'COVID and ASD': ['COVID', 'pandemic', 'coronavirus']
}

# Create a new column for the theme and initialize it as None
data['theme'] = None

# Iterate over each row in the DataFrame
for index, row in data.iterrows():
    title = row['title']
    
    # Search for keywords in the title to determine the theme
    for theme, keywords in themes.items():
        for keyword in keywords:
            if keyword in title:
                data.at[index, 'theme'] = theme
                break  # Break the inner loop if a match is found
        if data.at[index, 'theme'] is not None:
            break  # Break the outer loop if a match is found

# Clean the data
# Drop any rows with missing theme values
data.dropna(subset=['theme'], inplace=True)

# Group the data by themes
grouped_data = data.groupby('theme')

# Iterate over each theme group and perform further analysis or processing
for theme, theme_group in grouped_data:
    # Perform operations on each theme group
    # For example, you can print the number of posts in each theme group
    print(f"Theme: {theme}")
    print(f"Number of posts: {len(theme_group)}")
    print("--------------------")
    
    # You can also access specific columns in the theme_group DataFrame
    # For example, to print the titles of the posts in each theme group
    print(theme_group['title'])
    print("====================")


Theme: burden of ASD
Number of posts: 1
--------------------
246    Are social challenges fundamental to autism?
Name: title, dtype: object
Theme: care/treatment
Number of posts: 6
--------------------
83                        Are you scared of being alone?
84     Autistic-catered therapy for things unrelated ...
102    How many times does your school district provi...
177    How to not trauma dump on people, when you hav...
185    My brain doesn't want to do anything anymore a...
236    Be careful what and with whom you share inform...
Name: title, dtype: object
Theme: caregivers of ASD
Number of posts: 7
--------------------
49     How do i politely tell my family to NEVER FUCK...
63     I NEED to get money to move out of my abusive ...
110    Something feels terribly off and not right wit...
137    How did you become aware/discover the many res...
148              Apparently I don’t know how to be angry
178                  My family want I reaction like them
181                   