In [4]:
import numpy as np
import pandas as pd

##  Problem Statement

## Questions
*  How many instances are there in the dataset ?
* What are the correlations between different financial indicators and the bankruptcy status?
* Are there statistically significant differences in financial indicators (e.g., ROA, debt ratios) between bankrupt and non-bankrupt companies?
* Which financial indicators are the most significant predictors of a company's bankruptcy status?
* What are the primary financial differences between companies that go bankrupt and those that remain solvent?
* What specific thresholds for key financial ratios (e.g., debt ratio, interest coverage ratio) are associated with a high risk of bankruptcy

In [5]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,Bankrupt?,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,Operating Gross Margin,Realized Sales Gross Margin,Operating Profit Rate,Pre-tax net Interest Rate,After-tax net Interest Rate,Non-industry income and expenditure/revenue,...,Net Income to Total Assets,Total assets to GNP price,No-credit Interval,Gross Profit to Sales,Net Income to Stockholder's Equity,Liability to Equity,Degree of Financial Leverage (DFL),Interest Coverage Ratio (Interest expense to EBIT),Net Income Flag,Equity to Liability
0,1,0.370594,0.424389,0.405750,0.601457,0.601457,0.998969,0.796887,0.808809,0.302646,...,0.716845,0.009219,0.622879,0.601453,0.827890,0.290202,0.026601,0.564050,1,0.016469
1,1,0.464291,0.538214,0.516730,0.610235,0.610235,0.998946,0.797380,0.809301,0.303556,...,0.795297,0.008323,0.623652,0.610237,0.839969,0.283846,0.264577,0.570175,1,0.020794
2,1,0.426071,0.499019,0.472295,0.601450,0.601364,0.998857,0.796403,0.808388,0.302035,...,0.774670,0.040003,0.623841,0.601449,0.836774,0.290189,0.026555,0.563706,1,0.016474
3,1,0.399844,0.451265,0.457733,0.583541,0.583541,0.998700,0.796967,0.808966,0.303350,...,0.739555,0.003252,0.622929,0.583538,0.834697,0.281721,0.026697,0.564663,1,0.023982
4,1,0.465022,0.538432,0.522298,0.598783,0.598783,0.998973,0.797366,0.809304,0.303475,...,0.795016,0.003878,0.623521,0.598782,0.839973,0.278514,0.024752,0.575617,1,0.035490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6814,0,0.493687,0.539468,0.543230,0.604455,0.604462,0.998992,0.797409,0.809331,0.303510,...,0.799927,0.000466,0.623620,0.604455,0.840359,0.279606,0.027064,0.566193,1,0.029890
6815,0,0.475162,0.538269,0.524172,0.598308,0.598308,0.998992,0.797414,0.809327,0.303520,...,0.799748,0.001959,0.623931,0.598306,0.840306,0.278132,0.027009,0.566018,1,0.038284
6816,0,0.472725,0.533744,0.520638,0.610444,0.610213,0.998984,0.797401,0.809317,0.303512,...,0.797778,0.002840,0.624156,0.610441,0.840138,0.275789,0.026791,0.565158,1,0.097649
6817,0,0.506264,0.559911,0.554045,0.607850,0.607850,0.999074,0.797500,0.809399,0.303498,...,0.811808,0.002837,0.623957,0.607846,0.841084,0.277547,0.026822,0.565302,1,0.044009


In [12]:
# How many instances are there in the dataset ?
print("dataframe shape: ", df.shape)
print("There are ", df.shape[0], " instances in the dataframe")


dataframe shape:  (6819, 96)
There are  6819  instances in the dataframe


In [7]:
df.columns

Index(['Bankrupt?', ' ROA(C) before interest and depreciation before interest',
       ' ROA(A) before interest and % after tax',
       ' ROA(B) before interest and depreciation after tax',
       ' Operating Gross Margin', ' Realized Sales Gross Margin',
       ' Operating Profit Rate', ' Pre-tax net Interest Rate',
       ' After-tax net Interest Rate',
       ' Non-industry income and expenditure/revenue',
       ' Continuous interest rate (after tax)', ' Operating Expense Rate',
       ' Research and development expense rate', ' Cash flow rate',
       ' Interest-bearing debt interest rate', ' Tax rate (A)',
       ' Net Value Per Share (B)', ' Net Value Per Share (A)',
       ' Net Value Per Share (C)', ' Persistent EPS in the Last Four Seasons',
       ' Cash Flow Per Share', ' Revenue Per Share (Yuan ¥)',
       ' Operating Profit Per Share (Yuan ¥)',
       ' Per Share Net profit before tax (Yuan ¥)',
       ' Realized Sales Gross Profit Growth Rate',
       ' Operating Profit

In [8]:
df.loc[1]

Bankrupt?                                                   1.000000
 ROA(C) before interest and depreciation before interest    0.464291
 ROA(A) before interest and % after tax                     0.538214
 ROA(B) before interest and depreciation after tax          0.516730
 Operating Gross Margin                                     0.610235
                                                              ...   
 Liability to Equity                                        0.283846
 Degree of Financial Leverage (DFL)                         0.264577
 Interest Coverage Ratio (Interest expense to EBIT)         0.570175
 Net Income Flag                                            1.000000
 Equity to Liability                                        0.020794
Name: 1, Length: 96, dtype: float64

In [9]:
totalDebtPerNetWorth = df[' Total debt/Total net worth'] > 0.5
df[totalDebtPerNetWorth]

Unnamed: 0,Bankrupt?,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,Operating Gross Margin,Realized Sales Gross Margin,Operating Profit Rate,Pre-tax net Interest Rate,After-tax net Interest Rate,Non-industry income and expenditure/revenue,...,Net Income to Total Assets,Total assets to GNP price,No-credit Interval,Gross Profit to Sales,Net Income to Stockholder's Equity,Liability to Equity,Degree of Financial Leverage (DFL),Interest Coverage Ratio (Interest expense to EBIT),Net Income Flag,Equity to Liability
427,1,0.311315,0.279655,0.360887,0.598293,0.598293,0.998511,0.796186,0.808234,0.302381,...,0.542326,0.000456,0.623593,0.598295,0.0,0.745352,0.026749,0.564949,1,0.010437
1171,1,0.433676,0.490569,0.483109,0.590352,0.590352,0.998684,0.797092,0.809036,0.303601,...,0.767928,0.000283,0.624328,0.590349,0.83873,0.274804,0.026764,0.565026,1,0.798122
1865,1,0.207722,0.236862,0.237272,0.59936,0.59936,0.998834,0.796646,0.808621,0.302508,...,0.605902,0.001827,0.623288,0.59936,0.344652,0.643692,0.026781,0.565108,1,0.010494
2200,0,0.436357,0.538378,0.484876,0.598056,0.598056,0.998795,0.797503,0.809407,0.304086,...,0.800613,0.022488,0.624908,0.59805,0.84026,0.274851,0.026793,0.565167,1,0.580688
2247,0,0.460635,0.51799,0.512715,0.602992,0.602992,0.998968,0.797209,0.809146,0.303213,...,0.788336,0.004101,0.625238,0.602988,0.839683,0.274822,0.02679,0.565151,1,0.698967
2296,0,0.459513,0.526439,0.511483,0.604196,0.604196,0.998511,0.797036,0.808983,0.303864,...,0.793407,0.003711,0.626084,0.604193,0.839922,0.274785,0.02679,0.565154,1,0.942729
2345,0,0.462292,0.522242,0.516569,0.628324,0.628324,0.98279,0.773702,0.791394,0.295983,...,0.790799,0.003396,0.626519,0.628321,0.8398,0.274788,0.026772,0.565064,1,0.920638
2440,1,0.393702,0.422372,0.450131,0.587354,0.587354,0.998837,0.797234,0.809181,0.303528,...,0.717614,0.003198,0.625328,0.587351,0.442176,1.0,0.02662,0.564182,1,0.010365
2490,0,0.412178,0.47427,0.457733,0.0,0.0,0.988045,0.784312,0.796607,0.303526,...,0.766285,0.002466,0.626786,0.0,0.838652,0.274813,0.026789,0.565148,1,0.743588
3180,0,0.456686,0.514937,0.503292,0.614192,0.614192,0.998035,0.796751,0.808558,0.304362,...,0.786847,0.000356,0.625703,0.614187,0.839615,0.274792,0.026791,0.565158,1,0.881018


In [10]:
bankruptComp = df[df['Bankrupt?'] == 1]
notBankruptComp = df[df['Bankrupt?'] == 0]

print("Bankrupt Companies shape: ", bankruptComp.shape)
print("Not Bankrupt Companies shape: ", notBankruptComp.shape)

Bankrupt Companies shape:  (220, 96)
Not Bankrupt Companies shape:  (6599, 96)
