In [1]:
import pandas as pd

In [2]:
tele = pd.read_csv('tele.csv')
trans = pd.read_csv('trans.csv')
medical = pd.read_csv('medical.csv')
car = pd.read_csv('car.csv')
total_df_list = [tele,trans,medical,car]

In [3]:
for df in total_df_list:
    del df ['Unnamed: 0']
    for column_name in df.columns:
        df[column_name] = df[column_name].fillna(df[column_name].mean())

In [4]:
df = pd.concat(total_df_list,axis=0)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [6]:
feature_list = list(df.columns)
feature_list.remove('净资产收益率加权(%)')
feature_list.remove('code')
linear_model = LinearRegression()

In [7]:
import statsmodels.api as sm

In [8]:
model = sm.OLS(df['净资产收益率加权(%)'],df[feature_list])

In [9]:
results = model.fit()

In [10]:
print(results.params)

总资产利润率(%)           -1.186423
主营业务利润率(%)           0.247014
总资产净利润率(%)           1.162574
成本费用利润率(%)           0.005641
营业利润率(%)            -0.016775
                       ...   
经营现金净流量对销售收入比率(%)    0.194849
资产的经营现金流量回报率(%)      2.008283
经营现金净流量与净利润的比率(%)   -0.001739
经营现金净流量对负债比率(%)     -0.561707
现金流量比率(%)            0.001824
Length: 68, dtype: float64


In [11]:
results.summary()

0,1,2,3
Dep. Variable:,净资产收益率加权(%),R-squared (uncentered):,0.987
Model:,OLS,Adj. R-squared (uncentered):,0.986
Method:,Least Squares,F-statistic:,730.2
Date:,"Fri, 20 Dec 2019",Prob (F-statistic):,0.0
Time:,17:20:08,Log-Likelihood:,-1126.8
No. Observations:,711,AIC:,2390.0
Df Residuals:,643,BIC:,2700.0
Df Model:,68,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
总资产利润率(%),-1.1864,0.121,-9.788,0.000,-1.424,-0.948
主营业务利润率(%),0.2470,0.078,3.156,0.002,0.093,0.401
总资产净利润率(%),1.1626,0.090,12.889,0.000,0.985,1.340
成本费用利润率(%),0.0056,0.003,1.695,0.091,-0.001,0.012
营业利润率(%),-0.0168,0.007,-2.391,0.017,-0.031,-0.003
主营业务成本率(%),0.2445,0.079,3.104,0.002,0.090,0.399
销售净利率(%),0.0085,0.004,2.374,0.018,0.001,0.015
净资产收益率(%),0.8215,0.012,68.383,0.000,0.798,0.845
股本报酬率(%),-0.0127,0.004,-3.600,0.000,-0.020,-0.006

0,1,2,3
Omnibus:,311.469,Durbin-Watson:,1.932
Prob(Omnibus):,0.0,Jarque-Bera (JB):,50441.769
Skew:,-0.834,Prob(JB):,0.0
Kurtosis:,44.23,Cond. No.,239000000.0


In [12]:
new_feature_list = []
for name,p_value in zip(feature_list,results.pvalues):
    if(p_value<=0.05):
        new_feature_list.append(name)
    

In [13]:
new_feature_list


['总资产利润率(%)',
 '主营业务利润率(%)',
 '总资产净利润率(%)',
 '营业利润率(%)',
 '主营业务成本率(%)',
 '销售净利率(%)',
 '净资产收益率(%)',
 '股本报酬率(%)',
 '净资产报酬率(%)',
 '资产报酬率(%)',
 '资产负债率(%)',
 '股东权益比率(%)',
 '负债与所有者权益比率(%)',
 '产权比率(%)',
 '净利润增长率(%)',
 '基本每股收益(元)',
 '总资产周转率(次)']

In [14]:
model = sm.OLS(df['净资产收益率加权(%)'],df[new_feature_list])
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,净资产收益率加权(%),R-squared (uncentered):,0.986
Model:,OLS,Adj. R-squared (uncentered):,0.986
Method:,Least Squares,F-statistic:,2885.0
Date:,"Fri, 20 Dec 2019",Prob (F-statistic):,0.0
Time:,17:20:08,Log-Likelihood:,-1158.0
No. Observations:,711,AIC:,2350.0
Df Residuals:,694,BIC:,2428.0
Df Model:,17,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
总资产利润率(%),-1.1476,0.105,-10.913,0.000,-1.354,-0.941
主营业务利润率(%),0.1131,0.059,1.915,0.056,-0.003,0.229
总资产净利润率(%),1.2255,0.070,17.394,0.000,1.087,1.364
营业利润率(%),-0.0134,0.003,-4.852,0.000,-0.019,-0.008
主营业务成本率(%),0.1128,0.059,1.923,0.055,-0.002,0.228
销售净利率(%),0.0102,0.001,8.997,0.000,0.008,0.012
净资产收益率(%),0.8257,0.010,81.252,0.000,0.806,0.846
股本报酬率(%),-0.0072,0.002,-3.087,0.002,-0.012,-0.003
净资产报酬率(%),-0.0139,0.001,-21.881,0.000,-0.015,-0.013

0,1,2,3
Omnibus:,364.447,Durbin-Watson:,1.939
Prob(Omnibus):,0.0,Jarque-Bera (JB):,66154.174
Skew:,-1.166,Prob(JB):,0.0
Kurtosis:,50.198,Cond. No.,94700.0


In [15]:
new_feature_list.remove('主营业务利润率(%)')
new_feature_list.remove('资产报酬率(%)')
new_feature_list.remove('主营业务成本率(%)')
new_feature_list.remove('资产负债率(%)')

In [16]:
model = sm.OLS(df['净资产收益率加权(%)'],df[new_feature_list])
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,净资产收益率加权(%),R-squared (uncentered):,0.986
Model:,OLS,Adj. R-squared (uncentered):,0.986
Method:,Least Squares,F-statistic:,3742.0
Date:,"Fri, 20 Dec 2019",Prob (F-statistic):,0.0
Time:,17:20:08,Log-Likelihood:,-1162.8
No. Observations:,711,AIC:,2352.0
Df Residuals:,698,BIC:,2411.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
总资产利润率(%),-1.0436,0.068,-15.313,0.000,-1.177,-0.910
总资产净利润率(%),1.2239,0.070,17.508,0.000,1.087,1.361
营业利润率(%),-0.0093,0.001,-6.946,0.000,-0.012,-0.007
销售净利率(%),0.0096,0.001,9.635,0.000,0.008,0.012
净资产收益率(%),0.8283,0.010,87.174,0.000,0.810,0.847
股本报酬率(%),-0.0071,0.002,-3.220,0.001,-0.011,-0.003
净资产报酬率(%),-0.0139,0.001,-22.070,0.000,-0.015,-0.013
股东权益比率(%),-0.0032,0.001,-2.613,0.009,-0.006,-0.001
负债与所有者权益比率(%),0.0084,0.003,3.179,0.002,0.003,0.014

0,1,2,3
Omnibus:,313.845,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,55291.837
Skew:,-0.826,Prob(JB):,0.0
Kurtosis:,46.17,Cond. No.,90900.0


In [17]:
df

Unnamed: 0,总资产利润率(%),主营业务利润率(%),总资产净利润率(%),成本费用利润率(%),营业利润率(%),主营业务成本率(%),销售净利率(%),净资产收益率(%),股本报酬率(%),净资产报酬率(%),...,存货周转天数(天),总资产周转天数(天),流动资产周转率(次),流动资产周转天数(天),经营现金净流量对销售收入比率(%),资产的经营现金流量回报率(%),经营现金净流量与净利润的比率(%),经营现金净流量对负债比率(%),现金流量比率(%),code
0,11.09,35.34,11.36,30.16,22.38,63.72,18.89,18.41,76.86,18.46,...,42.47,448.95,0.77,349.47,0.17,0.10,0.88,0.24,24.51,300632
1,1.23,8.85,1.38,2.05,2.02,91.03,1.82,6.00,35.33,5.84,...,111.96,355.64,0.91,295.31,-0.00,-0.00,-0.15,-0.00,-0.24,2845
2,6.49,25.88,7.56,17.40,14.31,73.52,12.57,7.86,47.95,7.87,...,42.86,448.80,0.81,332.35,0.04,0.02,0.32,0.12,11.94,603115
3,4.61,29.66,4.79,10.82,9.47,69.32,8.13,6.16,24.51,6.15,...,115.43,458.09,0.85,316.23,0.01,0.01,0.17,0.03,3.12,300743
4,1.59,15.69,1.65,5.43,4.86,83.72,4.41,3.84,50.48,3.84,...,40.40,720.58,1.51,179.21,0.11,0.04,2.46,0.07,11.92,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,5.11,5.56,4.98,14.17,13.79,92.18,15.04,7.97,62.71,7.83,...,43.05,815.22,0.75,358.99,-0.21,-0.07,-1.42,-0.21,-29.63,601238
132,0.21,17.01,0.20,0.49,0.26,81.59,0.46,0.30,0.90,0.30,...,106.43,615.74,0.93,291.86,0.11,0.05,23.51,0.16,17.40,2708
133,3.31,21.49,3.75,15.54,14.91,77.17,15.58,7.91,70.62,7.49,...,91.38,1120.80,0.84,321.47,0.27,0.06,1.74,0.10,11.93,603035
134,3.30,29.62,3.32,19.67,16.50,69.60,14.37,6.23,53.81,6.23,...,160.63,1169.34,0.28,950.37,0.19,0.04,1.33,0.09,9.35,300652


In [18]:
results.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared (uncentered):,0.986
Dependent Variable:,净资产收益率加权(%),AIC:,2351.5774
Date:,2019-12-20 17:35,BIC:,2410.9441
No. Observations:,711,Log-Likelihood:,-1162.8
Df Model:,13,F-statistic:,3742.0
Df Residuals:,698,Prob (F-statistic):,0.0
R-squared (uncentered):,0.986,Scale:,1.5706

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
总资产利润率(%),-1.0436,0.0682,-15.3127,0.0000,-1.1774,-0.9098
总资产净利润率(%),1.2239,0.0699,17.5078,0.0000,1.0866,1.3611
营业利润率(%),-0.0093,0.0013,-6.9462,0.0000,-0.0119,-0.0066
销售净利率(%),0.0096,0.0010,9.6348,0.0000,0.0076,0.0116
净资产收益率(%),0.8283,0.0095,87.1744,0.0000,0.8097,0.8470
股本报酬率(%),-0.0071,0.0022,-3.2200,0.0013,-0.0115,-0.0028
净资产报酬率(%),-0.0139,0.0006,-22.0698,0.0000,-0.0151,-0.0126
股东权益比率(%),-0.0032,0.0012,-2.6129,0.0092,-0.0055,-0.0008
负债与所有者权益比率(%),0.0084,0.0027,3.1792,0.0015,0.0032,0.0136

0,1,2,3
Omnibus:,313.845,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,55291.837
Skew:,-0.826,Prob(JB):,0.0
Kurtosis:,46.17,Condition No.:,90889.0


In [51]:
data = pd.read_csv('corr.csv')

In [52]:
data.index

RangeIndex(start=0, stop=51, step=1)

In [58]:
verystrong_corr=[]
strong_corr=[]
medil_corr=[]
weak_corr=[]
no_corr=[]
for columns_name in data.columns:
    if columns_name == 'Unnamed: 0':
        continue
    for index_name,value in zip(data['Unnamed: 0'], data[columns_name]):
        print(value,index_name,columns_name)
        if abs(value)>=0.8:
            verystrong_corr.append([index_name,columns_name])
        elif 0.8>abs(value)>=0.6:
            strong_corr.append([index_name,columns_name])
        elif 0.6>abs(value)>=0.4:
            medil_corr.append([index_name,columns_name])
        elif 0.4>abs(value)>=0.2:
            weak_corr.append([index_name,columns_name])
        elif 0.2>abs(value):
            no_corr.append([index_name,columns_name])
        
        
            
            
        
        


0.6125020260653592 总资产利润率(%) 基本每股收益(元)
0.2415648494259431 主营业务利润率(%) 基本每股收益(元)
0.6949568051890967 总资产净利润率(%) 基本每股收益(元)
0.4156157933467444 成本费用利润率(%) 基本每股收益(元)
0.3850038910206547 营业利润率(%) 基本每股收益(元)
-0.2396916246074545 主营业务成本率(%) 基本每股收益(元)
0.3750295582861857 销售净利率(%) 基本每股收益(元)
0.5454583438459084 净资产收益率(%) 基本每股收益(元)
0.9496208229024964 股本报酬率(%) 基本每股收益(元)
0.07097908706037481 净资产报酬率(%) 基本每股收益(元)
0.6140921665441293 资产报酬率(%) 基本每股收益(元)
0.2152437818040068 销售毛利率(%) 基本每股收益(元)
-0.11607951560458407 三项费用比重(%) 基本每股收益(元)
-0.05331121003199405 非主营比重(%) 基本每股收益(元)
-0.09392962810001357 主营利润比重(%) 基本每股收益(元)
0.09085994618781827 流动比率(%) 基本每股收益(元)
0.09004806740790937 速动比率(%) 基本每股收益(元)
0.07190814063262517 现金比率(%) 基本每股收益(元)
0.0071472852488486185 利息支付倍数(%) 基本每股收益(元)
-0.1830623997727726 资产负债率(%) 基本每股收益(元)
0.011148056076919424 长期债务与营运资金比率(%) 基本每股收益(元)
0.18306249196573776 股东权益比率(%) 基本每股收益(元)
-0.05601571148892341 长期负债比率(%) 基本每股收益(元)
0.07166315220302669 股东权益与固定资产比率(%) 基本每股收益(元)
-0.07427450314528182 负债与所有者权益比率(%) 基本每股收益(

0.6211496592055444 总资产净利润率(%) 净资产收益率加权(%)
0.13684409147964202 成本费用利润率(%) 净资产收益率加权(%)
0.26772776059252024 营业利润率(%) 净资产收益率加权(%)
-0.3423547743815669 主营业务成本率(%) 净资产收益率加权(%)
0.10888007466555198 销售净利率(%) 净资产收益率加权(%)
0.9001934580037345 净资产收益率(%) 净资产收益率加权(%)
0.556464866810569 股本报酬率(%) 净资产收益率加权(%)
0.03986518946080488 净资产报酬率(%) 净资产收益率加权(%)
0.4642906453267535 资产报酬率(%) 净资产收益率加权(%)
0.2677200347363564 销售毛利率(%) 净资产收益率加权(%)
-0.2128097283965991 三项费用比重(%) 净资产收益率加权(%)
-0.032763606826636565 非主营比重(%) 净资产收益率加权(%)
-0.059889483652205526 主营利润比重(%) 净资产收益率加权(%)
0.1138556787467674 流动比率(%) 净资产收益率加权(%)
0.1113535240341781 速动比率(%) 净资产收益率加权(%)
0.08158700064834931 现金比率(%) 净资产收益率加权(%)
-0.03960611859664062 利息支付倍数(%) 净资产收益率加权(%)
-0.09911537009800077 资产负债率(%) 净资产收益率加权(%)
0.009279295420793915 长期债务与营运资金比率(%) 净资产收益率加权(%)
0.09911520025133552 股东权益比率(%) 净资产收益率加权(%)
-0.022427930258489773 长期负债比率(%) 净资产收益率加权(%)
0.024709668377921032 股东权益与固定资产比率(%) 净资产收益率加权(%)
-0.03061907819684505 负债与所有者权益比率(%) 净资产收益率加权(%)
-0.10567446662236207 长期资产与长

In [49]:
data

Unnamed: 0,基本每股收益(元),每股净资产(元),每股经营活动产生的现金流量净额(元),主营业务收入(万元),主营业务利润(万元),营业利润(万元),投资收益(万元),营业外收支净额(万元),利润总额(万元),净利润(万元),净利润(扣除非经常性损益后)(万元),经营活动产生的现金流量净额(万元),现金及现金等价物净增加额(万元),总资产(万元),流动资产(万元),总负债(万元),流动负债(万元),股东权益不含少数股东权益(万元),净资产收益率加权(%)
0,0.612502,0.067388,0.092081,0.004073,0.056342,0.174632,0.083399,0.824792,0.357373,0.462037,0.297851,0.021715,0.018173,-0.014526,-0.000778,-0.04445,-0.040401,0.045281,0.462424
1,0.241565,0.052517,0.040577,-0.149023,-0.029936,-0.006883,-0.059068,0.001031,-0.006207,0.004332,0.031506,-0.068167,0.078377,-0.147432,-0.133933,-0.158979,-0.16494,-0.121568,0.354721
2,0.694957,0.094775,0.13513,-0.003602,0.066492,0.191358,0.069082,0.654086,0.332949,0.424354,0.304474,0.023771,0.036349,-0.028888,-0.008687,-0.06081,-0.055873,0.035715,0.62115
3,0.415616,0.010832,0.021617,-0.000734,0.017093,0.124718,0.101515,0.975434,0.345986,0.448177,0.235565,0.016842,0.014946,0.010019,-0.000604,-0.014903,-0.018699,0.050214,0.136844
4,0.385004,0.024259,0.026682,0.009172,0.034435,0.136261,0.098036,0.754455,0.304898,0.380004,0.215296,0.030684,0.017995,0.028782,0.011569,0.001957,-0.005507,0.066772,0.267728
5,-0.239692,-0.052408,-0.039742,0.155261,0.034171,0.011313,0.061201,0.001044,0.010846,0.000872,-0.026604,0.07245,-0.079733,0.153416,0.139194,0.165322,0.171001,0.126832,-0.342355
6,0.37503,0.010384,0.015926,0.010582,0.020925,0.105964,0.094072,0.971482,0.327485,0.431708,0.218094,0.013636,0.002081,0.013392,0.009007,-0.006743,-0.006896,0.049041,0.10888
7,0.545458,0.120618,0.148602,0.047873,0.124662,0.214463,0.047386,0.031757,0.208429,0.235566,0.254919,0.074075,0.015883,0.025712,0.037571,0.011381,0.007044,0.051803,0.900193
8,0.949621,0.115311,0.107312,0.126749,0.226236,0.389958,0.246415,0.311848,0.438658,0.414289,0.346297,0.141542,0.113587,0.154833,0.131336,0.104767,0.09982,0.173917,0.556465
9,0.070979,0.012569,0.009043,0.007237,0.012546,0.015368,0.005393,-0.000372,0.014314,0.01416,0.014089,0.007371,-0.008044,0.008553,0.007465,0.006201,0.0052,0.012992,0.039865


In [62]:
weak_corr

[['主营业务利润率(%)', '基本每股收益(元)'],
 ['营业利润率(%)', '基本每股收益(元)'],
 ['主营业务成本率(%)', '基本每股收益(元)'],
 ['销售净利率(%)', '基本每股收益(元)'],
 ['销售毛利率(%)', '基本每股收益(元)'],
 ['主营业务收入增长率(%)', '基本每股收益(元)'],
 ['应收账款周转天数(天)', '基本每股收益(元)'],
 ['总资产周转率(次)', '基本每股收益(元)'],
 ['总资产周转天数(天)', '基本每股收益(元)'],
 ['流动资产周转天数(天)', '基本每股收益(元)'],
 ['资产的经营现金流量回报率(%)', '基本每股收益(元)'],
 ['经营现金净流量对负债比率(%)', '基本每股收益(元)'],
 ['现金流量比率(%)', '基本每股收益(元)'],
 ['流动资产周转率(次)', '主营业务收入(万元)'],
 ['股本报酬率(%)', '主营业务利润(万元)'],
 ['流动资产周转率(次)', '主营业务利润(万元)'],
 ['净资产收益率(%)', '营业利润(万元)'],
 ['股本报酬率(%)', '营业利润(万元)'],
 ['净资产增长率(%)', '营业利润(万元)'],
 ['总资产增长率(%)', '营业利润(万元)'],
 ['股本报酬率(%)', '投资收益(万元)'],
 ['总资产增长率(%)', '投资收益(万元)'],
 ['股本报酬率(%)', '营业外收支净额(万元)'],
 ['总资产周转天数(天)', '营业外收支净额(万元)'],
 ['总资产利润率(%)', '利润总额(万元)'],
 ['总资产净利润率(%)', '利润总额(万元)'],
 ['成本费用利润率(%)', '利润总额(万元)'],
 ['营业利润率(%)', '利润总额(万元)'],
 ['销售净利率(%)', '利润总额(万元)'],
 ['净资产收益率(%)', '利润总额(万元)'],
 ['资产报酬率(%)', '利润总额(万元)'],
 ['净资产增长率(%)', '利润总额(万元)'],
 ['总资产增长率(%)', '利润总额(万元)'],
 ['应收账款周转天数(天)', '利润总额(万元)'],
 ['流

In [63]:
no_corr

[['净资产报酬率(%)', '基本每股收益(元)'],
 ['三项费用比重(%)', '基本每股收益(元)'],
 ['非主营比重(%)', '基本每股收益(元)'],
 ['主营利润比重(%)', '基本每股收益(元)'],
 ['流动比率(%)', '基本每股收益(元)'],
 ['速动比率(%)', '基本每股收益(元)'],
 ['现金比率(%)', '基本每股收益(元)'],
 ['利息支付倍数(%)', '基本每股收益(元)'],
 ['资产负债率(%)', '基本每股收益(元)'],
 ['长期债务与营运资金比率(%)', '基本每股收益(元)'],
 ['股东权益比率(%)', '基本每股收益(元)'],
 ['长期负债比率(%)', '基本每股收益(元)'],
 ['股东权益与固定资产比率(%)', '基本每股收益(元)'],
 ['负债与所有者权益比率(%)', '基本每股收益(元)'],
 ['长期资产与长期资金比率(%)', '基本每股收益(元)'],
 ['资本化比率(%)', '基本每股收益(元)'],
 ['固定资产净值率(%)', '基本每股收益(元)'],
 ['资本固定化比率(%)', '基本每股收益(元)'],
 ['产权比率(%)', '基本每股收益(元)'],
 ['清算价值比率(%)', '基本每股收益(元)'],
 ['固定资产比重(%)', '基本每股收益(元)'],
 ['净利润增长率(%)', '基本每股收益(元)'],
 ['净资产增长率(%)', '基本每股收益(元)'],
 ['总资产增长率(%)', '基本每股收益(元)'],
 ['应收账款周转率(次)', '基本每股收益(元)'],
 ['存货周转率(次)', '基本每股收益(元)'],
 ['固定资产周转率(次)', '基本每股收益(元)'],
 ['存货周转天数(天)', '基本每股收益(元)'],
 ['流动资产周转率(次)', '基本每股收益(元)'],
 ['经营现金净流量对销售收入比率(%)', '基本每股收益(元)'],
 ['经营现金净流量与净利润的比率(%)', '基本每股收益(元)'],
 ['code', '基本每股收益(元)'],
 ['总资产利润率(%)', '每股净资产(元)'],
 ['主营业务利润率(%)', '每股净资