In [24]:
def save_descriptive_stats_with_groups_to_file(df, variables, group_vars, output_file_path):
    """Calculate descriptive statistics and group means for multiple group variables, then save to a CSV file."""
    # Calculate overall descriptive statistics
    stats_mean = df[variables].mean().rename('Mean')
    stats_sd = df[variables].std().rename('SD')
    stats_df = pd.concat([stats_mean, stats_sd], axis=1).transpose()

    # Calculate group means for multiple group variables
    group_means = calculate_group_means(df, group_vars, variables)

    # Combine overall statistics with group means
    combined_df = pd.concat([stats_df, group_means], sort=False)

    # Ensure the output directory exists
    output_dir = os.path.dirname(output_file_path)
    os.makedirs(output_dir, exist_ok=True)

    # Add an empty column at the beginning if your DataFrame needs an explicit placeholder
    # This helps align the first data column properly with its header
    combined_df.insert(0, '_', '')  # Adds an empty column as the first column, name it '_' or any placeholder

    # Save the combined statistics to a CSV file, without the index
    combined_df.to_csv(output_file_path, index=False)
    print(f"Descriptive statistics and group means saved to '{output_file_path}'.")


In [30]:
import pandas as pd
import os

def calculate_group_means(df, group_vars, variables):
    """Calculate means for specific groups within the dataset for multiple group variables."""
    all_group_means = pd.DataFrame()
    for group_var in group_vars:
        group_means = df.groupby(group_var)[variables].mean()
        # Rename columns to indicate these are group means for clarity
        group_means = group_means.rename(columns=lambda x: f"{x}_mean_by_{group_var}")
        # If all_group_means is empty, initialize it with group_means
        if all_group_means.empty:
            all_group_means = group_means
        else:
            # Concatenate along columns (axis=1) for different group_vars
            all_group_means = pd.concat([all_group_means, group_means], axis=1, sort=False)
    return all_group_means

def save_descriptive_stats_with_groups_to_file(df, variables, group_vars, output_file_path):
    """Calculate descriptive statistics and group means for multiple group variables, then save to a CSV file."""
    # Calculate overall descriptive statistics
    stats_mean = df[variables].mean().rename('Mean')
    stats_sd = df[variables].std().rename('SD')
    stats_df = pd.concat([stats_mean, stats_sd], axis=1).transpose()

    # Calculate group means for multiple group variables
    group_means = calculate_group_means(df, group_vars, variables)

    # Combine overall statistics with group means
    combined_df = pd.concat([stats_df, group_means], sort=False)

    # Ensure the output directory exists
    output_dir = os.path.dirname(output_file_path)
    os.makedirs(output_dir, exist_ok=True)

   
    # Save the combined statistics to a CSV file, without the index
    combined_df.to_csv(output_file_path, index=False)
    print(f"Descriptive statistics and group means saved to '{output_file_path}'.")


def main():
    # Load the dataset
    input_file_path = '../Input/summary_stats.dta'
    df = pd.read_stata(input_file_path)

    # Define the variables for which to calculate descriptive statistics
    variables = ['cap_share', 'sizelevel', 'leverage', 'betamsciworld', 'betamsciegypt', 'unrestbeta']
    
    # Define the grouping variables as a list
    group_vars = ['unconnected','connectedsum', 'ndpsum','military_ownershipsum','islamicsum','all']  # Replace with your actual grouping variables

    # Specify the output file path
    output_file_path = '../Output/Table1PanelA_with_Multiple_Groups.csv'

    # Save descriptive statistics and group means to a file
    save_descriptive_stats_with_groups_to_file(df, variables, group_vars, output_file_path)

if __name__ == "__main__":
    main()


Descriptive statistics and group means saved to '../Output/Table1PanelA_with_Multiple_Groups.csv'.


  group_means = df.groupby(group_var)[variables].mean()
  group_means = df.groupby(group_var)[variables].mean()
  group_means = df.groupby(group_var)[variables].mean()
  group_means = df.groupby(group_var)[variables].mean()
  group_means = df.groupby(group_var)[variables].mean()
  group_means = df.groupby(group_var)[variables].mean()


In [29]:
input_file = '../Output/Table1PanelA_with_Multiple_Groups.csv'
cleaned_file = 'cleaned_output.csv'

with open(input_file, 'r') as file, open(cleaned_file, 'w') as outfile:
    for line in file:
        # Replace sequences of commas with a single comma
        cleaned_line = ','.join(filter(None, line.split(',')))
        outfile.write(cleaned_line + '\n')
