In [2]:
import pandas as pd

# Step 0: Load the data ===
path = "/home/codysch/Downloads/2009q4/"

sub = pd.read_csv(path + "sub.txt", sep="\t", low_memory=False)
num = pd.read_csv(path + "num.txt", sep="\t", low_memory=False, on_bad_lines="skip")

# Merge sub and num on the 'adsh' key
merged = pd.merge(num, sub, on="adsh", how="inner")

# Step 1: Extract all revenue-related records
revenues = merged[merged["tag"].str.contains("Revenue", case=False, na=False)]
revenues = revenues[["adsh", "cik", "name", "period", "tag", "value"]]
revenues.rename(columns={"value": "revenue"}, inplace=True)

# Step 2: Extract all accounts receivable-related records
receivables = merged[merged["tag"].str.contains("Receivable", case=False, na=False)]
receivables = receivables[["adsh", "cik", "name", "period", "tag", "value"]]
receivables.rename(columns={"value": "receivables"}, inplace=True)

# Step 3: Merge revenue and receivable records by company and period
paired = pd.merge(
    revenues[["adsh", "cik", "name", "period", "revenue"]],
    receivables[["adsh", "cik", "name", "period", "receivables"]],
    on=["adsh", "cik", "name", "period"],
    how="inner"
)

##Compute Accounts Receivable to Revenue ratio 
paired["AR_to_Revenue"] = paired["receivables"] / paired["revenue"]

# Optional: Filter unrealistic or erroneous ratios (e.g., negatives or >10)
paired = paired[paired["AR_to_Revenue"].between(0, 10)]

# Preview first few results
print(paired.head(20))


print("\nTop 10 companies by average AR-to-Revenue ratio:")
print(company_ratios.head(10))





                    adsh    cik                 name    period       revenue  \
0   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
1   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
2   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
5   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
8   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
9   0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
10  0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
18  0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
23  0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  6.910000e+08   
24  0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  4.066000e+10   
25  0000040545-09-000074  40545  GENERAL ELECTRIC CO  20090930  4.066000e+10   
26  0000040545-09-000074  40545  GENERAL