In [1]:
import pandas as pd
import os

# Read the original CSV file into a DataFrame
df = pd.read_csv('../all_csv_images/all_data_all_images_no_duplicates.csv')

# Convert UNIX timestamp to datetime object and extract year
df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s')
df['year'] = df['created_utc'].dt.year

# Iterate through the years from 2019 to 2022
for year in range(2015, 2023):
    if year in df.year.unique():
        # Filter the data for the current year
        data = df[df['year'] == year]

        # Create a new subfolder for the year, if it doesn't already exist
        os.makedirs(f'../{year}', exist_ok=True)
        
        # Open a single text file for the current year
        with open(f'../{year}/statistics_{year}.txt', 'w') as file:
            # Score statistics
            score_stats = data.score.agg(['mean', 'median', 'std'])
            file.write(f'Score statistics:\n{score_stats}\n\n')

            # Author statistics
            unique_authors = data.author.nunique()
            file.write(f'Unique authors: {unique_authors}\n\n')

            # Total awards statistics
            total_awards = data.total_awards_received.sum()
            file.write(f'Total awards: {total_awards}\n\n')

            # Average awards per post statistics
            average_awards = data.total_awards_received.mean()
            file.write(f'Average awards per post: {average_awards}\n\n')

            # Total comments statistics
            total_comments = data.num_comments.sum()
            file.write(f'Total comments: {total_comments}\n\n')

            # Average comments per post statistics
            average_comments = data.num_comments.mean()
            file.write(f'Average comments per post: {average_comments}\n\n')

            # Total subreddit subscribers statistics
            total_subscribers = data.subreddit_subscribers.sum()
            file.write(f'Total subreddit subscribers: {total_subscribers}\n\n')

            # Average subreddit subscribers per post statistics
            average_subscribers = data.subreddit_subscribers.mean()
            file.write(f'Average subreddit subscribers per post: {average_subscribers}\n\n')

            # Total crossposts statistics
            total_crossposts = data.num_crossposts.sum()
            file.write(f'Total crossposts: {total_crossposts}\n\n')

            # Average crossposts per post statistics
            average_crossposts = data.num_crossposts.mean()
            file.write(f'Average crossposts per post: {average_crossposts}\n\n')

