In [1]:
import pandas as pd
import tushare as ts
import numpy as np

In [5]:
pledge = pd.read_csv('../data/pledge/pledge_neg.csv', 
                     parse_dates=['start_date'])
pledge = pledge.drop(['release_date'], axis=1)

In [6]:
pledge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27293 entries, 0 to 27292
Data columns (total 5 columns):
ts_code                27293 non-null object
start_date             27293 non-null datetime64[ns]
pledge_price           27293 non-null float64
forcast_close_line     27293 non-null float64
is_reach_close_line    27293 non-null bool
dtypes: bool(1), datetime64[ns](1), float64(2), object(1)
memory usage: 879.7+ KB


In [7]:
pledge.head()

Unnamed: 0,ts_code,start_date,pledge_price,forcast_close_line,is_reach_close_line
0,000002.SZ,2018-12-25,22.4751,15.73257,False
1,000002.SZ,2018-12-18,24.3593,17.05151,False
2,000002.SZ,2019-06-11,26.7721,18.74047,False
3,000002.SZ,2018-09-26,22.9654,16.07578,False
4,000002.SZ,2018-09-28,23.0231,16.11617,False


In [8]:
code_list = set(pledge['ts_code'])

In [9]:
len(code_list)

2222

In [10]:
fund_dict = {
    code: pd.read_csv('../data/fund_clean/{}.csv'.format(code), 
                      parse_dates=['end_date', 'ann_date']) 
    for code in code_list
}

In [11]:
for i in reversed(range(len(fund_dict['000002.SZ']))):
    print(i)

13
12
11
10
9
8
7
6
5
4
3
2
1
0


In [13]:
def merge_fund(x):
    fund_df = fund_dict[x.ts_code]
    if x.is_reach_close_line:
        pledge_date = x.close_date
    else:
        pledge_date = x.start_date
    for i in reversed(range(len(fund_df))):
        fund_ser = fund_df.loc[i]
        fund_date = fund_ser['ann_date']
        if fund_date >= pledge_date:
            return fund_ser

In [14]:
fund_df = pledge.apply(merge_fund, axis=1)

In [15]:
fund_df.head()

Unnamed: 0,ts_code,ann_date,end_date,current_ratio,quick_ratio,cash_ratio,inv_turn,ar_turn,ca_turn,netprofit_margin,...,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,revenue
0,000002.SZ,2019-03-26,2018-12-31,1.1543,0.4856,0.3977,0.276,197.2095,0.2574,16.5521,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
1,000002.SZ,2019-03-26,2018-12-31,1.1543,0.4856,0.3977,0.276,197.2095,0.2574,16.5521,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
2,000002.SZ,2019-08-21,2019-06-30,1.1191,0.4387,0.3268,0.1153,80.7413,0.1074,13.8427,...,31.4656,21.0203,88.0024,21.0203,88.0024,31.8907,273.2963,36.9291,401.5618,139320100000.0
3,000002.SZ,2018-10-26,2018-09-30,1.1612,0.4832,0.3543,0.1759,111.1831,0.1579,12.3836,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0
4,000002.SZ,2018-10-26,2018-09-30,1.1612,0.4832,0.3543,0.1759,111.1831,0.1579,12.3836,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0


In [16]:
fund_df.columns

Index(['ts_code', 'ann_date', 'end_date', 'current_ratio', 'quick_ratio',
       'cash_ratio', 'inv_turn', 'ar_turn', 'ca_turn', 'netprofit_margin',
       'grossprofit_margin', 'roe', 'debt_to_assets', 'op_yoy', 'ebt_yoy',
       'assets_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy',
       'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', 'q_profit_yoy', 'q_profit_qoq',
       'revenue'],
      dtype='object')

In [17]:
fund_df = fund_df.drop(['ts_code'], axis=1)

In [18]:
pledge = pd.concat([pledge, fund_df], axis=1)

In [19]:
pledge = pledge.drop(['ann_date', 'end_date'], axis=1)

In [20]:
pledge.head(20)

Unnamed: 0,ts_code,start_date,pledge_price,forcast_close_line,is_reach_close_line,current_ratio,quick_ratio,cash_ratio,inv_turn,ar_turn,...,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,revenue
0,000002.SZ,2018-12-25,22.4751,15.73257,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
1,000002.SZ,2018-12-18,24.3593,17.05151,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
2,000002.SZ,2019-06-11,26.7721,18.74047,False,1.1191,0.4387,0.3268,0.1153,80.7413,...,31.4656,21.0203,88.0024,21.0203,88.0024,31.8907,273.2963,36.9291,401.5618,139320100000.0
3,000002.SZ,2018-09-26,22.9654,16.07578,False,1.1612,0.4832,0.3543,0.1759,111.1831,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0
4,000002.SZ,2018-09-28,23.0231,16.11617,False,1.1612,0.4832,0.3543,0.1759,111.1831,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0
5,000002.SZ,2019-01-22,24.2535,16.97745,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
6,000002.SZ,2019-01-18,24.0228,16.81596,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
7,000002.SZ,2017-03-09,18.2068,12.74476,False,1.2436,0.4378,0.3354,0.4063,104.8766,...,22.9754,6.4419,192.0584,6.4419,192.0584,11.8802,294.9971,3.9559,306.6143,69810480000.0
8,000002.SZ,2017-10-24,23.8775,16.71425,False,1.2102,0.4697,0.314,0.1571,69.3443,...,0.039,11.9039,-7.675,11.9039,-7.675,19.4354,-41.9464,13.5359,-46.7393,30825620000.0
9,000002.SZ,2017-03-21,18.6273,13.03911,False,1.2436,0.4378,0.3354,0.4063,104.8766,...,22.9754,6.4419,192.0584,6.4419,192.0584,11.8802,294.9971,3.9559,306.6143,69810480000.0


In [21]:
pledge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27293 entries, 0 to 27292
Data columns (total 29 columns):
ts_code                27293 non-null object
start_date             27293 non-null datetime64[ns]
pledge_price           27293 non-null float64
forcast_close_line     27293 non-null float64
is_reach_close_line    27293 non-null bool
current_ratio          27292 non-null float64
quick_ratio            27292 non-null float64
cash_ratio             27292 non-null float64
inv_turn               27292 non-null float64
ar_turn                27292 non-null float64
ca_turn                27292 non-null float64
netprofit_margin       27292 non-null float64
grossprofit_margin     27292 non-null float64
roe                    27292 non-null float64
debt_to_assets         27292 non-null float64
op_yoy                 27292 non-null float64
ebt_yoy                27292 non-null float64
assets_yoy             27292 non-null float64
tr_yoy                 27292 non-null float64
or_yoy        

In [22]:
pledge = pledge.drop_duplicates()
pledge = pledge.dropna()

In [23]:
pledge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19753 entries, 0 to 27292
Data columns (total 29 columns):
ts_code                19753 non-null object
start_date             19753 non-null datetime64[ns]
pledge_price           19753 non-null float64
forcast_close_line     19753 non-null float64
is_reach_close_line    19753 non-null bool
current_ratio          19753 non-null float64
quick_ratio            19753 non-null float64
cash_ratio             19753 non-null float64
inv_turn               19753 non-null float64
ar_turn                19753 non-null float64
ca_turn                19753 non-null float64
netprofit_margin       19753 non-null float64
grossprofit_margin     19753 non-null float64
roe                    19753 non-null float64
debt_to_assets         19753 non-null float64
op_yoy                 19753 non-null float64
ebt_yoy                19753 non-null float64
assets_yoy             19753 non-null float64
tr_yoy                 19753 non-null float64
or_yoy        

In [24]:
pledge.head()

Unnamed: 0,ts_code,start_date,pledge_price,forcast_close_line,is_reach_close_line,current_ratio,quick_ratio,cash_ratio,inv_turn,ar_turn,...,or_yoy,q_gr_yoy,q_gr_qoq,q_sales_yoy,q_sales_qoq,q_op_yoy,q_op_qoq,q_profit_yoy,q_profit_qoq,revenue
0,000002.SZ,2018-12-25,22.4751,15.73257,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
1,000002.SZ,2018-12-18,24.3593,17.05151,False,1.1543,0.4856,0.3977,0.276,197.2095,...,22.5537,-3.2906,73.6779,-3.2906,73.6779,18.9694,198.0835,22.6986,232.0102,48374630000.0
2,000002.SZ,2019-06-11,26.7721,18.74047,False,1.1191,0.4387,0.3268,0.1153,80.7413,...,31.4656,21.0203,88.0024,21.0203,88.0024,31.8907,273.2963,36.9291,401.5618,139320100000.0
3,000002.SZ,2018-09-26,22.9654,16.07578,False,1.1612,0.4832,0.3543,0.1759,111.1831,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0
4,000002.SZ,2018-09-28,23.0231,16.11617,False,1.1612,0.4832,0.3543,0.1759,111.1831,...,50.3171,48.1234,-6.7883,48.1234,-6.7883,71.2685,-27.8129,73.7196,-29.5315,48374630000.0


In [25]:
pledge.to_csv('../data/pledge/pledge_company_neg.csv', index=False)