In [6]:
# -*- coding: utf-8 -*-
import time,math,traceback
from datetime import datetime
import pandas as pd
import numpy as np

# 综合分分数映射表
RATING_RULES_CUISHOU = {
    'nums_tel' : [-1, 0, 4, 7, 8],
    'call_in_times' : [-1, 0, 4, 7, 8],
    'call_in_less_15' : [-1, 0, 4, 7, 8],
    'most_times_by_tel' : [-1, 0, 2, 4, 5],
    'up_2_times_by_tel' : [-1, 0, 2, 5, 6],
    '7day_times' : [-1, 0, 2, 5, 6],
    '30day_times' : [-1, 0, 3, 5, 6],
    '60day_times' : [-1, 0, 3, 5, 6],
    '90day_times' : [-1, 0, 5, 7, 8],
}

JUDGE_LIST = [(-1,0), (0,3), (3,7), (7,14), (14,21), (21,28), (28,30), (30,60), (60,90)]

class Cuishou_score(object):
    """
    催收分处理类，处理Base、Result
    """
    def __init__(self):
        """
        初始化类
        """
        self.err_msg = list()
    
    def _ret_handle(self, status, message, content):
        """
        初始化输出格式
        """
        return  {'status': status, 'message': message, 'content': content}

    def _check_begin_time(self, bg_time):
        """
        检查通话时间
        """
        try:
            time.strptime("".join(bg_time.split()), "%Y%m%d%H%M%S")
        except:
            self.err_msg.append(traceback.format_exc()+'--error_begin_time:{}'.format(bg_time))
    
    def _check_bill_times(self, bi_time):
        """
        检查通话时长&格式化
        """
        try:
            return int("".join(bi_time.split()))
        except:
            self.err_msg.append(traceback.format_exc()+'--error_begin_time:{}'.format(bi_time))
            return -1

    def _deal_phone_time(self, judge_day, phone_time):
        """
        处理phone_time区间
        """
        if not phone_time:
            return -1
        days = (judge_day-phone_time).days
        for i, j in JUDGE_LIST: 
            if i < days <= j:
                return j
        return -1
    
    
    def _bulid_data(self, data, judge_time):
        """
        基础数据加工
        """
        if not data:
            return -1, 'No Data', {}, {}
        
        labels = ["toward_number", "toward_number_type", "total_phone_times", "called_times", 
                    "phone_times_in_15s", "phone_times_between_15_30s", "phone_times_over_60s", "phone_time"]
        try:
            df = pd.DataFrame.from_records(data, columns=labels)
        except:
            return -1, traceback.format_exc(), {}, {}

        # 参数校验&数据格式化
        df['phone_time'] = df.phone_time.map(pd.to_datetime)        

        # 综合分数据加工
        # print(df)
        base_list = []
        judge_day = datetime.strptime(judge_time, "%Y-%m-%d")
        pattern_cs_days = lambda x, y, z: 1 if y < (judge_day-x).days <= z else 0
        pattern_cs_nums = lambda x: 1 if x != 0 else 0
        for (types, tel), group in df.groupby(['toward_number_type','toward_number']):
            # print('\n',types, tel,'\n\n', group)
            sorted_df = group.sort_values(by='phone_time', ascending=True).reset_index()
            handle_days = lambda x, y: group['called_times'][group.phone_time.apply(pattern_cs_days, args=(x, y))==1].sum()
            called_times_in_7_days = handle_days(-1, 7)
            called_times_in_14_days = handle_days(-1, 14)
            called_times_in_21_days = handle_days(-1, 21)
            called_times_in_30_days = handle_days(-1, 30)
            called_times_between_30_60_days = handle_days(30, 60)
            called_times_between_60_90_days = handle_days(60, 90)
            records = dict()
            records["tel"] = tel
            records["number_type"] = types
            records["latest_phone_time"] = sorted_df['phone_time'].iloc[-1]
            records["first_phone_time"] = sorted_df['phone_time'].iloc[0]
            records["phone_numbers"] = 1
            records["total_phone_times"] = group.total_phone_times.sum()
            records["called_times"] = group.called_times.sum()
            records["phone_times_in_15s"] = group.phone_times_in_15s.sum()
            records["phone_times_between_15_30s"] = group.phone_times_between_15_30s.sum()
            records["phone_times_over_60s"] = group.phone_times_over_60s.sum()
            records["called_times_in_7_days"] = called_times_in_7_days
            records["called_times_in_14_days"] = called_times_in_14_days
            records["called_times_in_21_days"] = called_times_in_21_days
            records["called_times_in_30_days"] = called_times_in_30_days
            records["called_times_between_30_60_days"] = called_times_between_30_60_days
            records["called_times_between_60_90_days"] = called_times_between_60_90_days
            records["phone_numbers_in_7_days"] = pattern_cs_nums(called_times_in_7_days)
            records["phone_numbers_in_14_days"] = pattern_cs_nums(called_times_in_14_days)
            records["phone_numbers_in_21_days"] = pattern_cs_nums(called_times_in_21_days)
            records["phone_numbers_in_30_days"] = pattern_cs_nums(called_times_in_30_days)
            records["phone_numbers_between_30_60_days"] = pattern_cs_nums(called_times_between_30_60_days)
            base_list.append(records)

        base_df = pd.DataFrame(base_list)
        # print(base_df)
        
        cuishou_dict = dict()
        result_dict = dict()
        if not base_df.empty:
            cs_df = base_df[base_df.number_type == 1]
            yscs_df = base_df[base_df.number_type == 2]
            # print(cs_df)
            # print(yscs_df)
            # 综合分字段
            cuishou_dict = {
                'nums_tel' : cs_df.shape[0],
                'call_in_times' : cs_df['called_times'].sum(),
                'call_in_less_15' : cs_df['phone_times_in_15s'].sum(),
                'most_times_by_tel' : cs_df['called_times'].max(),
                'up_2_times_by_tel' : cs_df[cs_df['called_times']>2].shape[0],
                '7day_times' : cs_df['called_times_in_7_days'].sum(),
                '30day_times' : cs_df['called_times_in_30_days'].sum(),
                '60day_times' : cs_df['called_times_between_30_60_days'].sum(),
                '90day_times' : cs_df['called_times_between_60_90_days'].sum(),
            }
            # result表字段
            import pdb; pdb.set_trace()
            cs_latest_phone_time = cs_df.sort_values(by='latest_phone_time', ascending=True).reset_index()['latest_phone_time'].iloc[-1] if not cs_df.empty else None
            cs_first_phone_time = cs_df.sort_values(by='first_phone_time', ascending=True).reset_index()['first_phone_time'].iloc[0] if not cs_df.empty else None
            yscs_latest_phone_time = yscs_df.sort_values(by='latest_phone_time', ascending=True).reset_index()['latest_phone_time'].iloc[-1] if not yscs_df.empty else None
            yscs_first_phone_time = yscs_df.sort_values(by='first_phone_time', ascending=True).reset_index()['first_phone_time'].iloc[0] if not yscs_df.empty else None
            cs_latest_phone_days = self._deal_phone_time(judge_day, cs_latest_phone_time) 
            cs_first_phone_days = self._deal_phone_time(judge_day, cs_first_phone_time)
            yscs_latest_phone_days = self._deal_phone_time(judge_day, yscs_latest_phone_time)
            yscs_first_phone_days = self._deal_phone_time(judge_day, yscs_first_phone_time)
            to_int = lambda x : int(x)
            result_dict = {
                # cuishou data
                "cs_latest_phone_time" : cs_latest_phone_time.strftime("%Y%m%d") if cs_latest_phone_time else None,
                "cs_first_phone_time" : cs_first_phone_time.strftime("%Y%m%d") if cs_first_phone_time else None,
                "cs_latest_phone_days" : cs_latest_phone_days,
                "cs_first_phone_days" : cs_first_phone_days,
                "cs_phone_numbers" : to_int(cs_df.shape[0]),
                "cs_total_phone_times" : to_int(cs_df.total_phone_times.sum()),
                "cs_called_times" : to_int(cs_df.called_times.sum()),
                "cs_phone_times_in_15s" : to_int(cs_df.phone_times_in_15s.sum()),
                "cs_phone_times_between_15_30s" : to_int(cs_df.phone_times_between_15_30s.sum()),
                "cs_phone_times_over_60s" : to_int(cs_df.phone_times_over_60s.sum()),
                "cs_phone_numbers_in_7_days" : to_int(cs_df.phone_numbers_in_7_days.sum()),
                "cs_phone_numbers_in_14_days" : to_int(cs_df.phone_numbers_in_14_days.sum()),
                "cs_phone_numbers_in_21_days" : to_int(cs_df.phone_numbers_in_21_days.sum()),
                "cs_phone_numbers_in_30_days" : to_int(cs_df.phone_numbers_in_30_days.sum()),
                "cs_phone_numbers_between_30_60_days" : to_int(cs_df.phone_numbers_between_30_60_days.sum()),
                "cs_called_times_in_7_days" : to_int(cs_df.called_times_in_7_days.sum()),
                "cs_called_times_in_14_days" : to_int(cs_df.called_times_in_14_days.sum()),
                "cs_called_times_in_21_days" : to_int(cs_df.called_times_in_21_days.sum()),
                "cs_called_times_in_30_days" : to_int(cs_df.called_times_in_30_days.sum()),
                "cs_called_times_between_30_60_days" : to_int(cs_df.called_times_between_30_60_days.sum()),
                # yisicuishou data
                "yscs_latest_phone_time" : yscs_latest_phone_time.strftime("%Y%m%d") if yscs_latest_phone_time else None,
                "yscs_first_phone_time" : yscs_first_phone_time.strftime("%Y%m%d") if yscs_first_phone_time else None,
                "yscs_latest_phone_days" : yscs_latest_phone_days,
                "yscs_first_phone_days" : yscs_first_phone_days,
                "yscs_phone_numbers" : to_int(yscs_df.shape[0]),
                "yscs_total_phone_times" : to_int(yscs_df.total_phone_times.sum()),
                "yscs_called_times" : to_int(yscs_df.called_times.sum()),
                "yscs_phone_times_in_15s" : to_int(yscs_df.phone_times_in_15s.sum()),
                "yscs_phone_times_between_15_30s" : to_int(yscs_df.phone_times_between_15_30s.sum()),
                "yscs_phone_times_over_60s" : to_int(yscs_df.phone_times_over_60s.sum()),
                "yscs_phone_numbers_in_7_days" : to_int(yscs_df.phone_numbers_in_7_days.sum()),
                "yscs_phone_numbers_in_14_days" : to_int(yscs_df.phone_numbers_in_14_days.sum()),
                "yscs_phone_numbers_in_21_days" : to_int(yscs_df.phone_numbers_in_21_days.sum()),
                "yscs_phone_numbers_in_30_days" : to_int(yscs_df.phone_numbers_in_30_days.sum()),
                "yscs_phone_numbers_between_30_60_days" : to_int(yscs_df.phone_numbers_between_30_60_days.sum()),
                "yscs_called_times_in_7_days" : to_int(yscs_df.called_times_in_7_days.sum()),
                "yscs_called_times_in_14_days" : to_int(yscs_df.called_times_in_14_days.sum()),
                "yscs_called_times_in_21_days" : to_int(yscs_df.called_times_in_21_days.sum()),
                "yscs_called_times_in_30_days" : to_int(yscs_df.called_times_in_30_days.sum()),
                "yscs_called_times_between_30_60_days" : to_int(yscs_df.called_times_between_30_60_days.sum()),
            }
        return 0, 'Success', cuishou_dict, result_dict

    def _get_each_score(self, rating_rules, data):
        """
        综合分算法实现
        """
        score_total = 0
        df = pd.DataFrame([data])
        for k, v in rating_rules.items():
            # print(k,v)
            filter_data = [df[k].max() if x==max(v) else x for x in v] if df[k].max() > max(v) else v
            out_bins = pd.cut(df[k], bins=filter_data)
            counts = pd.value_counts(out_bins)
            counts = counts.sort_index()
            out = counts.reset_index(drop=True).to_frame()
            score = out.apply(lambda x: np.asarray(x) * np.asarray(pd.Series(np.arange(4))))[k].sum()
            score_total += score
            # print(score)
        return score_total

    def get_base(self, data, called_list):
        """
        获取Base结果
        """
        if not data:
            return self._ret_handle(-1, 'No Data', None)
        
        labels = ['call_number', 'bill_times', 'ticket_type', 'toward_number_type']
        try:
            df = pd.DataFrame.from_records(data, columns=labels)
        except:
            return self._ret_handle(-1, traceback.format_exc(), None)

        # 参数校验
        self.err_msg = list()
        df['bill_times'] = df.bill_times.map(self._check_bill_times)
        # print(self.err_msg, len(self.err_msg)*1.0/len(data))
        if len(self.err_msg)*1.0/len(data) >= 1:
            return self._ret_handle(-1, self.err_msg, None)

        # 基础数据加工
        base_list = []
        for toward_number, group in df.groupby('call_number'):
            toward_number_type = group.iloc[0]['toward_number_type']
            total_phone_times = int(group.shape[0])
            #被叫数据
            call_in_data = group.loc[group['ticket_type'].isin(called_list)]
            called_times = int(call_in_data.shape[0])
            phone_times_in_15s = int(call_in_data.bill_times.map(lambda x: 1 if 0 <= x <= 15 else 0).sum())
            phone_times_between_15_30s = int(call_in_data.bill_times.map(lambda x: 1 if 15 < x <= 30 else 0).sum())
            phone_times_over_60s = int( call_in_data.bill_times.map(lambda x: 1 if x > 60 else 0).sum())

            base_list.append((toward_number, toward_number_type, total_phone_times, called_times, phone_times_in_15s, phone_times_between_15_30s, phone_times_over_60s))
        return self._ret_handle(0, 'Success', base_list)

    def get_result(self, data, judge_time):
        """
        获取result结果
        """
        score = 0
        status, message, cuishou_data, result_dict = self._bulid_data(data, judge_time)
        if status == -1:
            return self._ret_handle(status, message, None)
        if cuishou_data:
            # 获取每个字段的分值
            score = self._get_each_score(RATING_RULES_CUISHOU, cuishou_data)
            score = int(math.ceil(score*0.88))
        result_dict['main_score'] = score
        return  self._ret_handle(0, 'Success', result_dict)

cuishou_inst = Cuishou_score()

def csf_base(**kwargs):
    """输出base表(7字段)
    Arguments:
        detailed_list {list} -- 输入详单(5字段), 元素为元组;
    Returns:
        dict -- 输出参数，{"status":0, "message":'Success', "content":[(),(),()...]}
    """
    # base表加工
    return cuishou_inst.get_base(kwargs['detailed_list'], kwargs['called_list'])

def csf_result(**kwargs):
    """输出result表(37字段)
    Arguments:
        detailed_list {list} -- 输入base表(8字段), 元素为元组;
    Returns:
        dict -- 输出参数，{"status":0, "message":'Success', "content":{...}}
    """
    # 综合分计算&result表加工
    return cuishou_inst.get_result(kwargs['detailed_list'], kwargs['judge_time'])


if __name__ == '__main__':
    detailed_list = [
            ('10081', ' 1 2 ', '00', 1),
            ('10081', '199999', '01', 1),
            ('10081', '53', '02', 1),
            ('10081', '15', '03', 1),
            ('10081', '9', '03', 1),
            ('10081', '30', '03', 1),
            ('10081', '60', '03', 1),
            ('10082', '53', '00', 2),
            ('10082', '2', '01', 2),
            ('10082', '54', '02', 2),
            ('10082', '61', '03', 2),
            ('10083', '24', '00', 1),
            ('10084', '34', '01', 1),
            ('10085', '12', '02', 2),
            ('10086', '62', '03', 2),
            ('10086', '62', '00', 2),
            ('10085', '12', '01', 2),
            ('10084', '34', '02', 1),
            ('10083', '24', '03', 1),
        ]
    st_time = time.time()
    ret = csf_base(detailed_list=detailed_list, called_list=['01', '03'])
    print(ret)
    print('time used:{}'.format(time.time()-st_time))
    if ret['status'] == 0:
        judge_time = datetime.now().strftime('%Y-%m-%d')
        st_time = time.time()
        # import random
        # detailed_list = map(lambda x:tuple(list(x) + ['20180{}{}'.format(random.randint(1,5), random.randint(10,17))]), ret['content'])
        # ["toward_number", "toward_number_type", "total_phone_times", "called_times", 
                    #"phone_times_in_15s", "phone_times_between_15_30s", "phone_times_over_60s", "phone_time"]
        detailed_list = [
            ('10081', 1, 10, 8, 4, 3, 4, '2018-05-16'),
            ('10082', 1, 10, 8, 4, 3, 4, '2018-05-16'),
            ('10083', 1, 10, 8, 4, 3, 4, '2018-05-16'),
            ('10084', 1, 10, 8, 4, 3, 4, '2018-05-16'),
            (u'02155368463', 1, 1, 1, 0, 0, 1, u'2018-05-05')
        ]
        result = csf_result(detailed_list=detailed_list,judge_time=judge_time)
        print(result)
        print('time used:{}'.format(time.time()-st_time))

{'status': 0, 'content': [('10081', 1, 7, 5, 2, 1, 1), ('10082', 2, 4, 2, 1, 0, 1), ('10083', 1, 2, 1, 0, 1, 0), ('10084', 1, 2, 1, 0, 0, 0), ('10085', 2, 2, 1, 1, 0, 0), ('10086', 2, 2, 1, 0, 0, 1)], 'message': 'Success'}
time used:0.0139999389648
> <ipython-input-6-2c4021cba494>(151)_bulid_data()
-> cs_latest_phone_time = cs_df.sort_values(by='latest_phone_time', ascending=True).reset_index()['latest_phone_time'].iloc[-1] if not cs_df.empty else None
(Pdb) q


BdbQuit: 

In [4]:
# -*- coding: utf-8 -*-
import time,math,traceback
from datetime import datetime
import pandas as pd
import numpy as np

rating_rules = {
    '60day_times': ([float('-inf') , 0, 3, 5, 6, 9, float('inf')], [1, 3, 5, 7, 9, 11]),
    'nums_tel': ([float('-inf'), 0, 4, 7, 8, float('inf')], [-1, 0, 4, 7, 8]),
    '7day_times': ([float('-inf'), 0, 2, 5, 6, float('inf')], [-1, 0, 2, 5, 6]),
    '30day_times': ([float('-inf'), 0, 3, 5, 6, float('inf')], [-1, 0, 3, 5, 6]),
    'call_in_less_15': ([float('-inf'), 0, 4, 7, 8, float('inf')], [-1, 0, 4, 7, 8]),
    'up_2_times_by_tel': ([float('-inf'), 0, 2, 5, 6, float('inf')], [-1, 0, 2, 5, 6]),
    '90day_times': ([float('-inf'), 0, 5, 7, 8, float('inf')], [-1, 0, 5, 7, 8]),
    'call_in_times': ([float('-inf'), 0, 4, 7, 8, float('inf')], [-1, 0, 4, 7, 8]),
    'most_times_by_tel': ([float('-inf'), 0, 2, 4, 5, float('inf')], [-1, 0, 2, 4, 5]),
}

data = {
    '60day_times': 12, 
    'nums_tel': 5, 
    '7day_times': 0, 
    '30day_times': 0, 
    'call_in_less_15': 16, 
    'up_2_times_by_tel': 4, 
    '90day_times': 0, 
    'call_in_times': 33, 
    'most_times_by_tel': 8
}

def _get_each_score(rating_rules, data):
        """
        综合分算法实现
        """
        score_total = 0
        df = pd.DataFrame([data])
        for k, v in rating_rules.items():
            # print(k,v)
            print k,v,df[k]
            out_bins = pd.cut(df[k], bins=v[0])
            counts = pd.value_counts(out_bins).sort_index()
            score = counts.multiply(np.array(v[1])).sum()
#             import pdb; pdb.set_trace()
            print score
            score_total += score
            # print(score)
        return score_total
print _get_each_score(rating_rules=rating_rules, data=data)

60day_times ([-inf, 0, 3, 5, 6, 9, inf], [1, 3, 5, 7, 9, 11]) 0    12
Name: 60day_times, dtype: int64
11
nums_tel ([-inf, 0, 4, 7, 8, inf], [-1, 0, 4, 7, 8]) 0    5
Name: nums_tel, dtype: int64
4
call_in_times ([-inf, 0, 4, 7, 8, inf], [-1, 0, 4, 7, 8]) 0    33
Name: call_in_times, dtype: int64
8
7day_times ([-inf, 0, 2, 5, 6, inf], [-1, 0, 2, 5, 6]) 0    0
Name: 7day_times, dtype: int64
-1
30day_times ([-inf, 0, 3, 5, 6, inf], [-1, 0, 3, 5, 6]) 0    0
Name: 30day_times, dtype: int64
-1
up_2_times_by_tel ([-inf, 0, 2, 5, 6, inf], [-1, 0, 2, 5, 6]) 0    4
Name: up_2_times_by_tel, dtype: int64
2
90day_times ([-inf, 0, 5, 7, 8, inf], [-1, 0, 5, 7, 8]) 0    0
Name: 90day_times, dtype: int64
-1
call_in_less_15 ([-inf, 0, 4, 7, 8, inf], [-1, 0, 4, 7, 8]) 0    16
Name: call_in_less_15, dtype: int64
8
most_times_by_tel ([-inf, 0, 2, 4, 5, inf], [-1, 0, 2, 4, 5]) 0    8
Name: most_times_by_tel, dtype: int64
5
35
