In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 

In [2]:
data = pd.read_csv("C:/Users/ADMIN/Documents/My projects/Startups_Analysis-/Startup Failure (Finance and Insurance).csv")

In [3]:
data.shape

(47, 20)

In [4]:
data.head()

Unnamed: 0,Name,Sector,Years of Operation,What They Did,How Much They Raised,Why They Failed,Takeaway,Giants,No Budget,Competition,Poor Market Fit,Acquisition Stagnation,Platform Dependency,Monetization Failure,Niche Limits,Execution Flaws,Trend Shifts,Toxicity/Trust Issues,Regulatory Pressure,Overhype
0,Avant,Finance and Insurance,2012-2023,Online personal loans,$655M,Lost to LendingClub and high defaults,Lending needs risk balance,1,1,1,0,0,0,0,0,0,0,0,0,0.0
1,Bitpass,Finance and Insurance,2002-2008,Micropayments platform,$2M,Lost to PayPal and low adoption,Micropayments need mass use,1,0,1,1,0,0,0,0,0,0,0,0,0.0
2,Cake Financial,Finance and Insurance,2006-2011,Portfolio tracking tool,$3M,Lost to Mint and sold to TradeKing,Finance tools need scale,1,0,1,0,1,0,0,0,0,0,0,0,0.0
3,Circle,Finance and Insurance,2013-2023,Crypto payments and stablecoin,$500M,Lost to Coinbase and market shifts,Crypto needs stability,1,0,1,0,0,0,0,0,0,1,0,0,0.0
4,Clarity Money,Finance and Insurance,2016-2022,Personal finance app,$11M,Lost to Mint/Acorns and sold to Goldman,Finance apps need edge,1,0,1,0,1,0,0,0,0,0,0,0,0.0


In [5]:
data.columns

Index(['Name', 'Sector', 'Years of Operation', 'What They Did',
       'How Much They Raised', 'Why They Failed', 'Takeaway', 'Giants',
       'No Budget', 'Competition', 'Poor Market Fit', 'Acquisition Stagnation',
       'Platform Dependency', 'Monetization Failure', 'Niche Limits',
       'Execution Flaws', 'Trend Shifts', 'Toxicity/Trust Issues',
       'Regulatory Pressure', 'Overhype'],
      dtype='object')

In [21]:
data.isnull().sum()

#Overhype has one missing value we fill it with 0 (not overhyped)
data['Overhype'].fillna(0, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Overhype'].fillna(0, inplace=True)


In [5]:
data.isnull().sum()

Name                      0
Sector                    0
Years of Operation        0
What They Did             0
How Much They Raised      0
Why They Failed           0
Takeaway                  0
Giants                    0
No Budget                 0
Competition               0
Poor Market Fit           0
Acquisition Stagnation    0
Platform Dependency       0
Monetization Failure      0
Niche Limits              0
Execution Flaws           0
Trend Shifts              0
Toxicity/Trust Issues     0
Regulatory Pressure       0
Overhype                  0
dtype: int64

In [49]:
# Get basic info about the dataset
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Name                    47 non-null     object 
 1   Sector                  47 non-null     object 
 2   Years of Operation      47 non-null     object 
 3   What They Did           47 non-null     object 
 4   How Much They Raised    47 non-null     object 
 5   Why They Failed         47 non-null     object 
 6   Takeaway                47 non-null     object 
 7   Giants                  47 non-null     int64  
 8   No Budget               47 non-null     int64  
 9   Competition             47 non-null     int64  
 10  Poor Market Fit         47 non-null     int64  
 11  Acquisition Stagnation  47 non-null     int64  
 12  Platform Dependency     47 non-null     int64  
 13  Monetization Failure    47 non-null     int64  
 14  Niche Limits            47 non-null     int6

In [50]:
# Check for duplicate rows
print(f"Duplicates: {data.duplicated().sum()}")

Duplicates: 0


In [13]:
data['What They Did'].unique()

array(['Online personal loans', 'Micropayments platform',
       'Portfolio tracking tool', 'Crypto payments and stablecoin',
       'Personal finance app', 'Mobile wallet', 'NFT marketplace',
       'Crowdfunding for startups', 'Prepaid card payments',
       'Personal crowdfunding', 'Social sentiment for trading',
       'Loans for subprime borrowers', 'P2P lending platform',
       'Coding bootcamp loans', 'Commission-free brokerage',
       'Real estate lending platform', 'Financial data aggregator',
       'Mobile payments platform', 'Online pawn lending',
       'Biometric payments', 'Digital estate planning', 'Payment gateway',
       'Debt management app', 'Prepaid debit card', 'Small biz banking',
       'Personal finance for gig workers', 'Robo-advisor',
       'Neobank with budgeting', 'Financial planning tools',
       'Social finance platform', 'P2P payments (early)',
       'Supply chain finance', 'Social crowdfunding',
       'Crowdfunded trusts', 'AI-driven lending',
  

## CAUSES OF FAILURE 

In [None]:
Causes = ['Giants','No Budget', 'Competition', 'Poor Market Fit', 'Acquisition Stagnation',
'Platform Dependency', 'Monetization Failure', 'Niche Limits','Execution Flaws', 'Trend Shifts',
'Toxicity/Trust Issues','Regulatory Pressure', 'Overhype']

def count_startups_by_failure(df, causes):
    """
    Counts the number of startups that failed due to each cause.

    Parameters:
    df (pd.DataFrame): The dataset containing failure causes.
    causes (list): List of failure cause column names.

    Returns:
    pd.DataFrame: A table with failure causes and their respective counts.
    """
    failure_counts = {cause: df[df[cause] == 1].shape[0] for cause in causes}
    
    # Convert dictionary to DataFrame
    failure_df = pd.DataFrame(failure_counts.items(), columns=["Cause of Failure", "Number of Startups"])
    
    # Sort in descending order 
    failure_df = failure_df.sort_values(by="Number of Startups", ascending=False).reset_index(drop=True)

    # Adjust index to start from 1 instead of 0
    failure_df.index = failure_df.index + 1
    
    return failure_df

# Example usage:
failure_table = count_startups_by_failure(data, Causes)

# style 
failure_table_styled = failure_table.style.set_table_styles(
    [{'selector': 'th', 'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold')]}]
).set_properties(**{'text-align': 'center'})

# Display 
failure_table_styled




Unnamed: 0,Cause of Failure,Number of Startups
1,Competition,47
2,Giants,45
3,Acquisition Stagnation,12
4,Poor Market Fit,11
5,Niche Limits,9
6,No Budget,3
7,Execution Flaws,3
8,Toxicity/Trust Issues,3
9,Regulatory Pressure,3
10,Trend Shifts,2


In [30]:
data[data['No Budget']==1]

Unnamed: 0,Name,Sector,Years of Operation,What They Did,How Much They Raised,Why They Failed,Takeaway,Giants,No Budget,Competition,Poor Market Fit,Acquisition Stagnation,Platform Dependency,Monetization Failure,Niche Limits,Execution Flaws,Trend Shifts,Toxicity/Trust Issues,Regulatory Pressure,Overhype
0,Avant,Finance and Insurance,2012-2023,Online personal loans,$655M,Lost to LendingClub and high defaults,Lending needs risk balance,1,1,1,0,0,0,0,0,0,0,0,0,0.0
35,Upstart,Finance and Insurance,2012-2023,AI-driven lending,$144M,Lost to banks and high defaults,Lending needs stability,1,1,1,0,0,0,0,0,0,0,0,0,0.0
43,Wise,Finance and Insurance,2011-2023,International transfers,$689M,Lost to PayPal and high costs,Transfers need edge,1,1,1,0,0,0,0,0,0,0,0,0,0.0


In [22]:
data['Start_Year'] = pd.to_numeric(data["Years of Operation"].str.split('-').str[0])
data['End_Year'] = pd.to_numeric(data["Years of Operation"].str.split('-').str[1])


data['Years_in_operation'] = data['End_Year'] - data['Start_Year']

data.head()


Unnamed: 0,Name,Sector,Years of Operation,What They Did,How Much They Raised,Why They Failed,Takeaway,Giants,No Budget,Competition,...,Monetization Failure,Niche Limits,Execution Flaws,Trend Shifts,Toxicity/Trust Issues,Regulatory Pressure,Overhype,Start_Year,End_Year,Years_in_operation
0,Avant,Finance and Insurance,2012-2023,Online personal loans,$655M,Lost to LendingClub and high defaults,Lending needs risk balance,1,1,1,...,0,0,0,0,0,0,0.0,2012,2023,11
1,Bitpass,Finance and Insurance,2002-2008,Micropayments platform,$2M,Lost to PayPal and low adoption,Micropayments need mass use,1,0,1,...,0,0,0,0,0,0,0.0,2002,2008,6
2,Cake Financial,Finance and Insurance,2006-2011,Portfolio tracking tool,$3M,Lost to Mint and sold to TradeKing,Finance tools need scale,1,0,1,...,0,0,0,0,0,0,0.0,2006,2011,5
3,Circle,Finance and Insurance,2013-2023,Crypto payments and stablecoin,$500M,Lost to Coinbase and market shifts,Crypto needs stability,1,0,1,...,0,0,0,1,0,0,0.0,2013,2023,10
4,Clarity Money,Finance and Insurance,2016-2022,Personal finance app,$11M,Lost to Mint/Acorns and sold to Goldman,Finance apps need edge,1,0,1,...,0,0,0,0,0,0,0.0,2016,2022,6


In [None]:
data['']