In [1]:
import pandas as pd
import re


In [2]:
def extract_table(c_name):
    # Initialize an empty list to collect rows of the table
    file_name = '../results/gam_model_1to1/{}_model_summary.txt' .format(c_name)
    table_data = []
    columns = ["Parameter", "Estimate", "Std. Error", "t value", "Pr(>|t|)"]
    # Read the file and extract lines with table data
    with open(file_name, 'r') as file:
        capture = False
        for line in file:
            # Start capturing once we reach the Parametric coefficients section
            if "Parametric coefficients:" in line:
                capture = True
                continue
            # Stop capturing at the line with 'Approximate significance of smooth terms:'
            elif "---" in line:
                break
            elif capture:
                # Append only non-empty lines
                if line.strip():
                    table_data.append(line.strip())

    # Convert the collected lines into a DataFrame
    # Assuming data is space-separated and column headers are already known
    df = pd.DataFrame([row.split(maxsplit=4) for row in table_data], columns=columns)
    # Ensure the 'Pr(>|t|)' column is read as a string
    df['Pr(>|t|)'] = df['Pr(>|t|)'].astype(str)
    # Convert other columns to appropriate numeric types if necessary
    for col in ["Estimate", "Std. Error", "t value"]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df = df.drop(0)
    
    df.to_csv('../results/temp/gam_model_1to1/{}_model_summary.csv'.format(c_name), index=False)

In [5]:
c_list = ['Grocery_and_Pharmacy', 'General_Retail', 
                        'Art_and_Entertainment', 'Restaurant_and_Bar',
                        'Education', 'Healthcare']
for c_i in c_list:
    extract_table(c_i)