In [2]:
from sec_edgar_downloader import Downloader
#needs to comply with sec privacy :(( )
dl = Downloader("test", "test@test.com")
import pandas as pd 
import time
import requests

#sec offers an api to get the daily or quartely indexes, metadata for all fillings
#there is no endpoint to specifically search by form type
#the actual data can only be downloaded with an accesion number and cik
#using cik fetched from daily index, fetch all fillings by company and select those with type D 
#download the actual file using url from the data line(the current data url returns the entire txt, we only need the actual xml file for the filing ) 
year = 2024
quarter = 4
date = '20241127'
base_url = 'https://www.sec.gov/Archives/edgar/daily-index'
index_url = f'{base_url}/{year}/QTR{quarter}/company.{date}.idx'

headers = {
    'User-Agent': 'YourNameHere (your_email@example.com)',
    'Accept-Encoding': 'gzip, deflate',
    'Host': 'www.sec.gov',
    'Connection': 'keep-alive',
}

response = requests.get(index_url, headers=headers)


if response.status_code == 403:
    print("Access denied. Ensure you have a proper User-Agent header.")
    exit()

response.raise_for_status() 


content = response.text


lines = content.splitlines()


form_d_filings = []



header = ["Company Name", "Form Type", "CIK", "Date Filed", "File Name"]
data_lines = lines[3:] 
#daily index returns idx file with all the fillings for the day, fixed width format, parse it into pandas 
records = []
for line in data_lines:
    if line.strip():  
        company_name = line[:60].strip()
        form_type = line[60:71].strip()
        cik = line[71:82].strip()
        date_filed = line[82:92].strip()
        file_name = line[92:].strip()
        records.append([company_name, form_type, cik, date_filed, file_name])


df = pd.DataFrame(records, columns=header)
print(response.text)

form_d_df = df[df["Form Type"] == "D"] 
row_dict = form_d_df.iloc[1].to_dict()  
print("Row at index 1 as a dictionary:")
for key, value in row_dict.items():
    print(f"{key}: {value}")
#actual data can be downloaded with CIK and an accession number for the filling
form_d_df['Acession_number'] = form_d_df['File Name'].str.split('/').str[-1].str.replace('-', '').str.replace('.txt', '', regex=False)
form_d_df

base_url = "https://www.sec.gov/Archives/edgar/data/"
headers = {
    'User-Agent': 'Test (test@test.com)',
    'Accept-Encoding': 'gzip, deflate',
    'Host': 'www.sec.gov',
    'Connection': 'keep-alive',
}
xml_data_list = []

for index, row in form_d_df.head(5).iterrows():
    cik = row['CIK']
    accession_number = row['Acession_number']
    url = f"{base_url}{cik}/{accession_number}/primary_doc.xml"
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            xml_data_list.append(response.text)
        else:
            xml_data_list.append(f"Failed for {url} with status code {response.status_code}")
    except Exception as e:
        xml_data_list.append(f"Error for {url}: {str(e)}")

for i, content in enumerate(xml_data_list[:5]):
    print(f"Entry {i + 1}:\n{content}\n{'-'*40}")

#//todo parse xml file to pandas 

#-----

import ast  # For safely evaluating the string representation of dictionaries/lists
import xml.etree.ElementTree as ET

# Function to safely extract text from an XML element
def safe_find_text(element, path):
    result = element.find(path)
    return result.text if result is not None else None

# Function to process the 'Related Persons' data
def process_related_persons(related_persons_data):
    columns = {}

    # Initialize counters for Executive Officer numbering
    executive_officer_count = 1

    for person in related_persons_data:
        person_name = person.get('Related Person Name', 'N/A')
        person_address = person.get('Related Person Address', 'N/A')
        person_city = person.get('Related Person City', 'N/A')
        person_relationships = person.get('Related Person Relationships', 'N/A')

        # Create the detailed person information string
        person_details = f"Related Person Name: {person_name}\n" \
                         f"Related Person Address: {person_address}\n" \
                         f"Related Person City: {person_city}\n" \
                         f"Related Person Relationships: {person_relationships}"

        # Assign to appropriate columns based on roles
        relationships = [rel.strip() for rel in person_relationships.split(',')]
        if 'Executive Officer' in relationships:
            column_name = f'Executive Officer {executive_officer_count}'
            columns[column_name] = person_details
            executive_officer_count += 1

        if 'Director' in relationships:
            columns['Executive Officer: Director'] = person_details

        if 'Promoter' in relationships:
            columns['Executive Officer: Promoter'] = person_details

    # Ensure numbering continuity if there are more persons than initial columns
    for i in range(executive_officer_count, 8):
        columns[f'Executive Officer {i}'] = 'N/A'

    return columns

# Iterate through xml_data_list to process each entry
rows = []
for xml_data in xml_data_list:
    root = ET.fromstring(xml_data)

    # Initialize a dictionary to hold the row data
    row = {
        'CIK': safe_find_text(root, './/primaryIssuer/cik'),
        'Entity Name': safe_find_text(root, './/primaryIssuer/entityName'),
        'Issuer Address': safe_find_text(root, './/primaryIssuer/issuerAddress/street1'),
        'City': safe_find_text(root, './/primaryIssuer/issuerAddress/city'),
        'State': safe_find_text(root, './/primaryIssuer/issuerAddress/stateOrCountry'),
        'Zip Code': safe_find_text(root, './/primaryIssuer/issuerAddress/zipCode'),
        'Issuer Phone Number': safe_find_text(root, './/primaryIssuer/issuerPhoneNumber'),
        'Jurisdiction of Incorporation': safe_find_text(root, './/primaryIssuer/jurisdictionOfInc'),
        'Year of Incorporation': safe_find_text(root, './/primaryIssuer/yearOfInc/value'),
        'Offering Amount': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalOfferingAmount'),
        'Amount Sold': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalAmountSold'),
        'Amount Remaining': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalRemaining'),
        'Date of First Sale': safe_find_text(root, './/offeringData/typeOfFiling/dateOfFirstSale/value'),
        'Has Non-Accredited Investors': safe_find_text(root, './/offeringData/investors/hasNonAccreditedInvestors'),
        'Number Already Invested': safe_find_text(root, './/offeringData/investors/totalNumberAlreadyInvested')
    }

    # Extract related persons as a list of dictionaries
    related_persons = root.findall('.//offeringData/relatedPersonsList/relatedPersonInfo')
    related_persons_list = []
    for person in related_persons:
        related_persons_list.append({
            'Related Person Name': f"{safe_find_text(person, './/relatedPersonName/firstName')} {safe_find_text(person, './/relatedPersonName/lastName')}",
            'Related Person Address': safe_find_text(person, './/relatedPersonAddress/street1'),
            'Related Person City': safe_find_text(person, './/relatedPersonAddress/city'),
            'Related Person Relationships': ", ".join([rel.text for rel in person.findall('.//relatedPersonRelationshipList/relationship')])
        })

    # Process related persons data into role-based columns
    role_columns = process_related_persons(related_persons_list)
    row.update(role_columns)
    rows.append(row)

# Convert the list of rows to a pandas DataFrame
df = pd.DataFrame(rows)

# Display the resulting DataFrame

df




          
     

Description:           Daily Index of EDGAR Dissemination Feed by Company Name
Last Data Received:    Nov 27, 2024
Comments:              webmaster@sec.gov
Anonymous FTP:         ftp://ftp.sec.gov/edgar/
 
 
 
 
Company Name                                                  Form Type   CIK
      Date Filed  File Name
-------------------------------------------------------------------------------------------------------------------------------------------------
111, Inc.                                                     6-K         1738906     20241127    edgar/data/1738906/0000950103-24-016821.txt         
1250 Henderson Apartments, LLC                                D/A         2035254     20241127    edgar/data/2035254/0002007560-24-000067.txt         
1st FRANKLIN FINANCIAL CORP                                   424B3       38723       20241127    edgar/data/38723/0000038723-24-000159.txt           
1st FRANKLIN FINANCIAL CORP                                   424B3       38723    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  form_d_df['Acession_number'] = form_d_df['File Name'].str.split('/').str[-1].str.replace('-', '').str.replace('.txt', '', regex=False)


Entry 1:
<?xml version="1.0"?>
<edgarSubmission>

    <schemaVersion>X0708</schemaVersion>

    <submissionType>D</submissionType>

    <testOrLive>LIVE</testOrLive>

    <primaryIssuer>
        <cik>0002046396</cik>
        <entityName>4th South Valor Capital LLC</entityName>
        <issuerAddress>
            <street1>400 CLEVELAND STREET</street1>
            <city>CLEARWATER</city>
            <stateOrCountry>FL</stateOrCountry>
            <stateOrCountryDescription>FLORIDA</stateOrCountryDescription>
            <zipCode>33755</zipCode>
        </issuerAddress>
        <issuerPhoneNumber>727-441-2144</issuerPhoneNumber>
        <jurisdictionOfInc>FLORIDA</jurisdictionOfInc>
        <issuerPreviousNameList>
            <value>None</value>
        </issuerPreviousNameList>
        <edgarPreviousNameList>
            <value>None</value>
        </edgarPreviousNameList>
        <entityType>Limited Liability Company</entityType>
        <yearOfInc>
            <withinFiveYears>true</

Unnamed: 0,CIK,Entity Name,Issuer Address,City,State,Zip Code,Issuer Phone Number,Jurisdiction of Incorporation,Year of Incorporation,Offering Amount,...,Date of First Sale,Has Non-Accredited Investors,Number Already Invested,Executive Officer 1,Executive Officer 2,Executive Officer 3,Executive Officer 4,Executive Officer 5,Executive Officer 6,Executive Officer 7
0,2046396,4th South Valor Capital LLC,400 CLEVELAND STREET,CLEARWATER,FL,33755,727-441-2144,FLORIDA,2022,160000000,...,2024-11-13,False,10,,,,,,,
1,2046470,8369 Campground Circle LLC,9878 W BELLEVIEW AVE #5058,"DENVER,",CO,80123,4045329531,WYOMING,2024,2000000,...,,True,0,,,,,,,
2,2046439,"9005 Sorensen XC Opportunity Fund, LLC",2100 ROSS AVENUE,DALLAS,TX,75201,562-546-0200,DELAWARE,2024,12920000,...,2024-11-12,False,38,,,,,,,
3,2044740,"AG-0617 Gaingels Fund I, a series of Zachary G...","119 South Main Street, Suite 220",SEATTLE,WA,98104,2068016359,DELAWARE,2024,459966,...,2024-11-12,False,66,,,,,,,
4,2046306,ALL PRO THE PARKS OF PENSACOLA APARTMENTS LLLP,"13521 NORTHGATE ESTATES DRIVE, SUITE 200",COLORADO SPRINGS,CO,80921,719-358-8580,COLORADO,2024,9500000,...,2024-03-25,False,102,,,,,,,


In [8]:
import ast  # For safely evaluating the string representation of dictionaries/lists
import xml.etree.ElementTree as ET

# Function to safely extract text from an XML element
def safe_find_text(element, path):
    result = element.find(path)
    return result.text if result is not None else None

# Function to process the 'Related Persons' data
def process_related_persons(related_persons_data):
    columns = {}

    # Initialize counters for Executive Officer numbering
    executive_officer_count = 1

    for person in related_persons_data:
        person_name = person.get('Related Person Name', 'N/A')
        person_address = person.get('Related Person Address', 'N/A')
        person_city = person.get('Related Person City', 'N/A')
        person_relationships = person.get('Related Person Relationships', 'N/A')

        # Create the detailed person information string
        person_details = f"Related Person Name: {person_name}\n" \
                         f"Related Person Address: {person_address}\n" \
                         f"Related Person City: {person_city}\n" \
                         f"Related Person Relationships: {person_relationships}"

        # Assign to appropriate columns based on roles
        relationships = [rel.strip() for rel in person_relationships.split(',')]
        if 'Executive Officer' in relationships:
            column_name = f'Executive Officer {executive_officer_count}'
            columns[column_name] = person_details
            executive_officer_count += 1

        if 'Director' in relationships:
            columns['Executive Officer: Director'] = person_details

        if 'Promoter' in relationships:
            columns['Executive Officer: Promoter'] = person_details

    # Ensure numbering continuity if there are more persons than initial columns
    for i in range(executive_officer_count, 8):
        columns[f'Executive Officer {i}'] = 'N/A'

    return columns

# Iterate through xml_data_list to process each entry
rows = []
for xml_data in xml_data_list:
    root = ET.fromstring(xml_data)

    # Initialize a dictionary to hold the row data
    row = {
        'CIK': safe_find_text(root, './/primaryIssuer/cik'),
        'Entity Name': safe_find_text(root, './/primaryIssuer/entityName'),
        'Issuer Address': safe_find_text(root, './/primaryIssuer/issuerAddress/street1'),
        'City': safe_find_text(root, './/primaryIssuer/issuerAddress/city'),
        'State': safe_find_text(root, './/primaryIssuer/issuerAddress/stateOrCountry'),
        'Zip Code': safe_find_text(root, './/primaryIssuer/issuerAddress/zipCode'),
        'Issuer Phone Number': safe_find_text(root, './/primaryIssuer/issuerPhoneNumber'),
        'Jurisdiction of Incorporation': safe_find_text(root, './/primaryIssuer/jurisdictionOfInc'),
        'Year of Incorporation': safe_find_text(root, './/primaryIssuer/yearOfInc/value'),
        'Offering Amount': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalOfferingAmount'),
        'Amount Sold': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalAmountSold'),
        'Amount Remaining': safe_find_text(root, './/offeringData/offeringSalesAmounts/totalRemaining'),
        'Date of First Sale': safe_find_text(root, './/offeringData/typeOfFiling/dateOfFirstSale/value'),
        'Has Non-Accredited Investors': safe_find_text(root, './/offeringData/investors/hasNonAccreditedInvestors'),
        'Number Already Invested': safe_find_text(root, './/offeringData/investors/totalNumberAlreadyInvested')
    }

    # Extract related persons as a list of dictionaries
    related_persons = root.findall('.//offeringData/relatedPersonsList/relatedPersonInfo')
    related_persons_list = []
    for person in related_persons:
        related_persons_list.append({
            'Related Person Name': f"{safe_find_text(person, './/relatedPersonName/firstName')} {safe_find_text(person, './/relatedPersonName/lastName')}",
            'Related Person Address': safe_find_text(person, './/relatedPersonAddress/street1'),
            'Related Person City': safe_find_text(person, './/relatedPersonAddress/city'),
            'Related Person Relationships': ", ".join([rel.text for rel in person.findall('.//relatedPersonRelationshipList/relationship')])
        })

    # Process related persons data into role-based columns
    role_columns = process_related_persons(related_persons_list)
    row.update(role_columns)
    rows.append(row)

# Convert the list of rows to a pandas DataFrame
df = pd.DataFrame(rows)

# Display the resulting DataFrame



In [77]:
df

Unnamed: 0,CIK,Entity Name,Issuer Address,City,State,Zip Code,Issuer Phone Number,Jurisdiction of Incorporation,Year of Incorporation,Offering Amount,...,Date of First Sale,Has Non-Accredited Investors,Number Already Invested,Executive Officer 1,Executive Officer 2,Executive Officer 3,Executive Officer 4,Executive Officer 5,Executive Officer 6,Executive Officer 7
0,2046396,4th South Valor Capital LLC,400 CLEVELAND STREET,CLEARWATER,FL,33755,727-441-2144,FLORIDA,2022,160000000,...,2024-11-13,False,10,,,,,,,
1,2046470,8369 Campground Circle LLC,9878 W BELLEVIEW AVE #5058,"DENVER,",CO,80123,4045329531,WYOMING,2024,2000000,...,,True,0,,,,,,,
2,2046439,"9005 Sorensen XC Opportunity Fund, LLC",2100 ROSS AVENUE,DALLAS,TX,75201,562-546-0200,DELAWARE,2024,12920000,...,2024-11-12,False,38,,,,,,,
3,2044740,"AG-0617 Gaingels Fund I, a series of Zachary G...","119 South Main Street, Suite 220",SEATTLE,WA,98104,2068016359,DELAWARE,2024,459966,...,2024-11-12,False,66,,,,,,,
4,2046306,ALL PRO THE PARKS OF PENSACOLA APARTMENTS LLLP,"13521 NORTHGATE ESTATES DRIVE, SUITE 200",COLORADO SPRINGS,CO,80921,719-358-8580,COLORADO,2024,9500000,...,2024-03-25,False,102,,,,,,,
