In [2]:
import csf

In [3]:
def get_raw_factor(factors, index_code, start_date, end_date, freq='M'):
    """
    :param factors: str or list, 因子代码"M009006"或因子代码列表["M009006", "M009007"]
    :param index_code: str, 指数代码，如"000300"
    :param start_date: str, 开始日期，如"2008-04-30"
    :param end_date: str，结束日期，如"2015-12-31"
    :param freq: str，换仓周期，周"W"、月"M"、季"Q"，每个周期的最后一个交易日
    :param filter: dict, 股票筛选
    :return: pd.DataFrame，因子值
    """
    temp = csf.get_stock_factor(factors=factors, index=index_code,
                                start_date=start_date, end_date=end_date, freq=freq)
    df = pd.pivot_table(temp, values='value', index=['date', 'code'], columns=['cd'])
    return df

In [17]:
def cut_group(data_, num_group, col_name=None, ascending=False):
    """
    Cut the given data to defined groups asc or desc.

    :param data: pd.Series of pd.DataFrame
    :param col_name: str
        the column used to rank
    :param num_group: int
        cut the data to num_group groups
    :param ascending: Bool
        True: ascending, False: Descending

    :return

    """
    data = data_.copy()
#     if isinstance(data, pd.DataFrame):
#         data = data.loc[:, col_name]

    data_len = len(data)
    avg_element = data_len // num_group
    remains = data_len % num_group
    each_group = [avg_element] * num_group
    if remains:
        for idx in range(0, remains):
            each_group[idx] += 1
    each_group = np.array(each_group)
    each_group = each_group.cumsum()
    try:
        idx = data.loc[:, col_name].rank(method='first', na_option='bottom', ascending=ascending)
    except:
        print(idx)
        print(data.index[0])
        print(col_name)
        print('error occurred in cut_group')
    groups = pd.Series(index=idx.index)
    start = 0
    for grp, end in enumerate(each_group):
        mask = (idx > start) & (idx <= end)
        groups[mask] = ''.join(['Q', str(grp + 1)])
        start = end
    groups = groups.tolist()
    data[col_name + 'g'] = groups 
    return data



In [4]:
df = get_raw_factor(['M009006','M009007'], index_code='000300', start_date='2016-01-01', end_date='2016-07-01')


In [21]:
df = df.dropna()

In [22]:
dfg=df.groupby(level=0).apply(lambda frame: cut_group(frame, 5, 'M009006'))

In [47]:
group_mean=dfg.groupby(level=0).apply(lambda frame: frame.groupby('M009006g')['M009006'].mean())

In [51]:
group_mean.ix[:,-1]

date
2007-04-30     7.953750
2007-05-31   -16.032679
2007-06-29   -33.302500
2007-07-31     1.914483
2007-08-31     0.177759
2007-09-28   -30.468214
2007-10-31   -44.386071
2007-11-30   -47.491228
2007-12-28    14.968333
2008-01-31   -51.245965
2008-02-29   -36.566786
2008-03-31   -57.341607
2008-04-30   -28.444561
2008-05-30   -45.761636
2008-06-30   -68.050926
2008-07-31   -15.677544
2008-08-29   -57.935517
2008-09-26   -30.182807
2008-10-31   -68.128103
2008-11-28    -7.530000
2008-12-31   -34.684828
2009-01-23   -10.555172
2009-02-27    -7.859483
2009-03-31     4.840345
2009-04-30   -24.319310
2009-05-27   -18.180862
2009-06-30   -23.802281
2009-07-31    -8.982881
2009-08-31   -68.395690
2009-09-30    -8.805254
2009-10-30    -8.627414
2009-11-30    -7.293793
2009-12-31   -43.243333
2010-01-29   -56.946552
2010-02-26   -30.826667
2010-03-31   -38.058621
2010-04-30   -72.094915
2010-05-31   -50.185517
2010-06-30   -57.791754
2010-07-30    12.437544
2010-08-31   -35.169655
2010-09-30 

In [18]:
cut_group(df.ix['2007-04-30'], 5, 'M009006')

cd,M009006,M009007,M009006g
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
000001,,,Q5
000002,21.23,62.9149,Q4
000009,51.82,80.1228,Q2
000012,20.16,64.3646,Q4
000021,47.97,61.5163,Q3
000022,18.21,62.9863,Q5
000024,45.86,62.2100,Q3
000027,31.48,58.2184,Q4
000029,59.48,82.8193,Q2
000031,68.18,85.3876,Q1


In [15]:
df.head()

Unnamed: 0_level_0,cd,M009006,M009007
date,code,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-04-30,1,,
2007-04-30,2,21.23,62.9149
2007-04-30,9,51.82,80.1228
2007-04-30,12,20.16,64.3646
2007-04-30,21,47.97,61.5163


In [40]:
len(df.index.get_level_values(1).unique())

593

In [54]:
type(group_mean['Q1'])

pandas.core.series.Series

M009006g,Q1,Q2,Q3,Q4,Q5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-30,71.06474,54.22446,44.91107,32.36429,7.95375
2007-05-31,3215.056,1149.641,498.0943,22.36096,-135.5988
2007-06-29,75033.04,2454.797,-4317.007,-418.0181,4346.879
2007-07-31,4144434.0,98794.88,-135364.9,-9185.249,12670.82
2007-08-31,242628600.0,4224392.0,-4433589.0,-195755.4,14923.34
2007-09-28,10798330000.0,96017480.0,-26729870.0,2102572.0,-439794.8
2007-10-31,330252800000.0,72552150.0,356520100.0,-56877560.0,19080920.0
2007-11-30,5841660000000.0,-301829700.0,-5837423000.0,1670903000.0,-887095600.0
2007-12-28,402287500000000.0,-16733110000.0,-277189500000.0,64252000000.0,-14165440000.0
2008-01-31,3342316000000000.0,156916200000.0,6611748000000.0,-2284677000000.0,711756100000.0


In [57]:
group_mean=dfg.groupby(level=0).apply(lambda frame: frame.groupby('M009006g').apply(lambda df: df.corr()))

In [58]:
group_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,cd,M009006,M009007
date,M009006g,cd,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-04-30,Q1,M009006,1.000000,-0.141978
2007-04-30,Q1,M009007,-0.141978,1.000000
2007-04-30,Q2,M009006,1.000000,0.050454
2007-04-30,Q2,M009007,0.050454,1.000000
2007-04-30,Q3,M009006,1.000000,0.111943
2007-04-30,Q3,M009007,0.111943,1.000000
2007-04-30,Q4,M009006,1.000000,-0.074850
2007-04-30,Q4,M009007,-0.074850,1.000000
2007-04-30,Q5,M009006,1.000000,0.467204
2007-04-30,Q5,M009007,0.467204,1.000000


In [61]:
def __get_secus_and_caps(df):
    from collections import namedtuple
    SecuCap = namedtuple('SecuCap', ['secu', 'cap'])
    flat_df = df.dropna().reset_index()
    return SecuCap(flat_df.code, flat_df.M009006)

In [67]:
ans=dfg.groupby(level=0).apply(lambda frame: frame.groupby('M009006g').apply(__get_secus_and_caps)).unstack()

In [71]:
ans

M009006g,Q1,Q2,Q3,Q4,Q5
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2007-04-30,"([000031, 000069, 000089, 000099, 000422, 0005...","([000009, 000029, 000059, 000066, 000401, 0005...","([000021, 000024, 000036, 000068, 000503, 0005...","([000027, 000061, 000400, 000528, 000550, 0007...","([000002, 000012, 000022, 000039, 000063, 0000..."
2007-05-31,"([000001, 000009, 000024, 000031, 000039, 0000...","([000029, 000063, 000066, 000401, 000422, 0005...","([000027, 000503, 000539, 000562, 000601, 0006...","([000002, 000022, 000059, 000088, 000089, 0000...","([000012, 000021, 000036, 000060, 000550, 0005..."
2007-06-29,"([000001, 000027, 000157, 000538, 000550, 0005...","([000002, 000022, 000063, 000410, 000422, 0005...","([000021, 000031, 000039, 000059, 000069, 0000...","([000012, 000024, 000060, 000503, 000559, 0006...","([000066, 000068, 000089, 000100, 000425, 0004..."
2007-07-31,"([000001, 000002, 000031, 000046, 000060, 0000...","([000012, 000024, 000066, 000503, 000559, 0005...","([000009, 000027, 000029, 000402, 000422, 0005...","([000021, 000036, 000059, 000063, 000157, 0004...","([000068, 000088, 000089, 000410, 000538, 0006..."
2007-08-31,"([000059, 000488, 000528, 000559, 000636, 0007...","([000002, 000089, 000401, 000562, 000617, 0006...","([000021, 000066, 000068, 000157, 000400, 0004...","([000009, 000012, 000029, 000031, 000036, 0000...","([000001, 000024, 000027, 000039, 000046, 0004..."
2007-09-28,"([000039, 000046, 000060, 000068, 000401, 0004...","([000001, 000009, 000012, 000422, 000425, 0005...","([000021, 000024, 000027, 000623, 000680, 0007...","([000029, 000059, 000063, 000069, 000089, 0001...","([000002, 000031, 000036, 000066, 000088, 0004..."
2007-10-31,"([000001, 000002, 000024, 000061, 000063, 0000...","([000009, 000021, 000031, 000059, 000060, 0000...","([000036, 000046, 000488, 000617, 000680, 0006...","([000012, 000029, 000401, 000410, 000422, 0005...","([000027, 000039, 000066, 000503, 000539, 0005..."
2007-11-30,"([000036, 000488, 000503, 000581, 000625, 0006...","([000059, 000061, 000063, 000066, 000528, 0005...","([000012, 000029, 000039, 000069, 000089, 0004...","([000157, 000401, 000425, 000527, 000539, 0005...","([000001, 000002, 000009, 000021, 000024, 0000..."
2007-12-28,"([000021, 000027, 000059, 000401, 000422, 0005...","([000039, 000410, 000488, 000541, 000581, 0006...","([000029, 000060, 000066, 000088, 000157, 0005...","([000001, 000031, 000061, 000528, 000539, 0005...","([000002, 000012, 000036, 000069, 000402, 0004..."
2008-01-31,"([000031, 000061, 000063, 000069, 000338, 0004...","([000001, 000024, 000029, 000046, 000059, 0001...","([000002, 000009, 000012, 000088, 000089, 0004...","([000036, 000539, 000543, 000559, 000562, 0006...","([000027, 000039, 000572, 000625, 000758, 0007..."
