In [3]:
import os
import requests
import pandas as pd
import concurrent.futures

In [4]:
def einLookup(eins):
    results = []

    def fetch_data(ein):
        try:
            q = f'https://projects.propublica.org/nonprofits/api/v2/organizations/{ein}.json'
            response = requests.get(q)
            response.raise_for_status()
            data = response.json()
            # print(data)

            if 'filings_with_data' in data and data['filings_with_data']:
                filing_data = data['filings_with_data'][0]
                fiscal_year_end = filing_data.get('tax_prd_yr', 'N/A')

                total_revenue = filing_data.get('totrevenue', 'N/A')
                totfuncexpns = filing_data.get('totfuncexpns', 'N/A')
                net_revenue = total_revenue - totfuncexpns

                result = [
                    ein,
                    data['organization']['name'],
                    total_revenue,
                    fiscal_year_end,
                    filing_data.get('pdf_url', 'N/A'),
                    data['organization'].get('city', 'N/A'),
                    data['organization'].get('state', 'N/A'),
                    data['organization'].get('ntee_code', 'N/A'),
                    net_revenue,  # Add the calculated net revenue to the result
                    filing_data.get('formtype', 'N/A')
                ]
                return result
            else:
                print(f'No filing data found for EIN {ein}')
                return None
        except Exception as e:
            print(f'Error for EIN {ein}: {e}')
            return None

    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
        results = list(executor.map(fetch_data, eins))

    results = [r for r in results if r is not None]

    return results

def einLoader():
    excel_file_path = "/home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Data/EIN.csv"
    einVal = pd.read_csv(excel_file_path)
    ein_list = [str(ein) for ein in einVal['EIN']]

    batch_folder = "/home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch"
    batch_size = 50
    length = len(ein_list)
    # length = 2

    for i in range(0, length, batch_size):
        batch = ein_list[i:i + batch_size]
        results = einLookup(batch)
        batch_filename = os.path.join(batch_folder, f'out_batch_{i // batch_size}.csv')
        csvWriter(batch_filename, results)

def csvWriter(filename, data):
    df = pd.DataFrame(data, columns=['EIN_value', 'Name', 'Gross_Revenue', 'Tax_Period_Year', 'PDF_URL', 'City', 'State', 'NTEE_Code', 'Net_Revenue', 'Form_Type'])
    df.to_csv(filename, index=False)
    print(f'Data successfully written to {filename}.')

In [5]:
if __name__ == "__main__":
    einLoader()

No filing data found for EIN 871824915
Data successfully written to /home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch/out_batch_0.csv.
Data successfully written to /home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch/out_batch_1.csv.
No filing data found for EIN 920535709
No filing data found for EIN 873810878
No filing data found for EIN 832886466
No filing data found for EIN 886650516
No filing data found for EIN 841994814
Data successfully written to /home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch/out_batch_2.csv.
No filing data found for EIN 844321978
No filing data found for EIN 756085383
No filing data found for EIN 372006472
Data successfully written to /home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch/out_batch_3.csv.
No filing data found for EIN 882878709
No filing data found for EIN 866495457
Data successfully written to /home/dark/GitHub/Opportunity-Hack-KSJ/OpportunityHack/Batch/out_batch_4.csv.
No filing data found for EIN 8525056