In [1]:
import pandas as pd

df = pd.read_csv("supercleaned_dataset.csv")

# Function to infer IPO status including venture capital and private equity
def determine_ipo_status(row):
    # Rule 1: If post-IPO columns have non-zero values, it's post-IPO
    if row['post_ipo_equity'] > 0 or row['post_ipo_debt'] > 0:
        return 'post_ipo'
    
    # Rule 2: If the company has gone to Round B or higher, or has venture capital or private equity funding, mark as likely_pre_ipo
    if (row['round_b'] > 0 or 
        row['round_c'] > 0 or 
        row['round_d'] > 0 or 
        row['round_e'] > 0 or 
        row['round_f'] > 0 or 
        row['round_g'] > 0 or 
        row['round_h'] > 0 or 
        row['venture'] > 0 or 
        row['private_equity'] > 0):
        return 'likely_pre_ipo'
    
    # Rule 3: If only Round A is completed and no venture/private equity, mark as pre_ipo
    if row['round_a'] > 0 and row['round_b'] == 0 and row['venture'] == 0 and row['private_equity'] == 0:
        return 'pre_ipo'
    
    # Default to pre_ipo if no other conditions are met
    return 'pre_ipo'

# Apply the function to each row in the DataFrame
df['ipo_status'] = df.apply(determine_ipo_status, axis=1)

# Display the updated DataFrame
print(df[['name', 'ipo_status']])


                                name      ipo_status
0                            waywire         pre_ipo
1                 &TV Communications  likely_pre_ipo
2                  'Rock' Your Paper         pre_ipo
3                  (In)Touch Network         pre_ipo
4                 -R- Ranch and Mine         pre_ipo
...                              ...             ...
42742                          Zyrra  likely_pre_ipo
42743                     Zytoprotec  likely_pre_ipo
42744                          Zzish         pre_ipo
42745  ZZNode Science and Technology  likely_pre_ipo
42746          Zzzzapp Wireless ltd.         pre_ipo

[42747 rows x 2 columns]


In [3]:
# Save the DataFrame to a CSV file
df.to_csv('supercleaned_dataset_ipo.csv', index=False)

print("File saved as supercleaned_dataset_ipo.csv")


File saved as supercleaned_dataset_ipo.csv
