In [41]:
# -*- coding: utf-8 -*-
import time
import traceback
from datetime import datetime as DT
from functools import partial
import pandas as pd
import numpy as np

rating_rules = {
    'innet_months': ([float('-inf') , -8887, 15, 25, 40, 52, 72, float('inf')], [-9, -69, -25, -17, -1, 31, 42]),
    'dinner_rent_fee': ([float('-inf'), -8887, 30, 55, 118, float('inf')], [-25, -8, 15, 30, 88]),
    'hd_maxofflinedaysin3months': ([float('-inf'), -8887, 1, 2, 7, float('inf')], [-17, 25, 17, -25, -50]),
    'hd_contactearlymorningrate': ([float('-inf'), -8887, 0.01, 0.02, 0.03, 0.05, float('inf')], [-18, 27, 11, -21, -24, -63]),
    'dhb_callTelTotalNumCollHist': ([float('-inf'), 1, float('inf')], [23, -53]),
}

class Stats(object):
    """
    统计处理类
    """
    def __init__(self):
        """
        初始化类
        """
        self.err_msg = list()

    def _ret_handle(self, status, message, content):
        """
        初始化输出格式
        """
        return {'status': status, 'message': message, 'content': content}

    def _check_begin_time(self, bg_time):
        """
        检查通话时间&格式化
        """
        if not bg_time:
            return -1
        try:
            return DT.strptime("".join(bg_time.split()), "%Y%m%d%H%M%S")
        except:
            self.err_msg.append(traceback.format_exc() +
                                '--error_begin_time:{}'.format(bg_time))
            return -1

    def _check_call_duration(self, bi_time):
        """
        检查通话时长&格式化
        """
        if not bi_time:
            return -1
        try:
            return int("".join(bi_time.split()))
        except:
            self.err_msg.append(traceback.format_exc() +
                                '--error_begin_time:{}'.format(bi_time))
            return -1

    def _count_blank(self, date_x, date_y, judge_time):
        """
        关机天数计数
        """
        lenth_day = date_y - date_x
        blank_day = lenth_day.days
        judge_sec = (judge_time - date_x).total_seconds()
        # 近3个月
        if judge_sec < 7776000 and blank_day > self.lenth_90:
            self.lenth_90 = blank_day
        return date_y

    def _get_each_score(self, rating_rules, data):
        """
        评分卡算法实现
        """
        score_total = 0
        df = pd.DataFrame([data])
        for k, v in rating_rules.items():
            print k, v, df[k]
            out_bins = pd.cut(df[k], bins=v[0])
            counts = pd.value_counts(out_bins).sort_index()
            score = counts.multiply(np.array(v[1])).sum()
            print score
            score_total += score
        return score_total

    def get_result(self, data, called_list, judge_time):
        """
        获取Base结果
        """
        if not data:
            return self._ret_handle(-1, 'No Data', None)

        labels = ['call_number', 'call_time', 'tel_type', 'phone_time', 'dinner_rent_fee', 'innet_months']
        ex_label = ['dinner_rent_fee', 'innet_months']
        try:
            df = pd.DataFrame.from_records(data, columns=labels, exclude=ex_label)
        except:
            return self._ret_handle(-1, traceback.format_exc(), None)

        # 在网时长&套餐金额
        try:
            innet_months = data[0][5] if data[0][5] != None else -8887
            dinner_rent_fee = data[0][4] if data[0][4] != None else -8887
        except:
            innet_months = -8887
            dinner_rent_fee = -8887

        # 参数校验&格式化
        judge_time = DT.strptime(judge_time, "%Y%m%d")
        self.err_msg = list()
        df['call_time'] = df.call_time.map(self._check_begin_time)
        df['phone_time'] = df.phone_time.apply(pd.to_datetime, format="%Y%m%d")
        df = df[~df.call_time.isin([-1, ])]
        # print(self.err_msg, len(self.err_msg)*1.0/len(data))
        if len(self.err_msg)*1.0/len(data) >= 1:
            return self._ret_handle(-1, self.err_msg, None)

        # 公共方法
        days_pat = lambda x, y, z: 1 if y < (judge_time-x).days <= z else 0
        df_pat = lambda x,y: df[df.phone_time.apply(days_pat, args=(x, y)) == 1]

        # 数据集加工
        df_90 = df_pat(-1, 90)

        # 关机统计
        blank_ret = set(df_90.call_time.tolist())
        self.lenth_90 = 0
        reduce(partial(self._count_blank, judge_time=judge_time), sorted(blank_ret))


        # 通话次数&时长/凌晨通话占比
        # pat
        lower_7_pat = (df_90.call_time.dt.hour >= 0) & (df_90.call_time.dt.hour < 7)
        hd_contactearlymorningrate = "{:.2f}".format((df_90[lower_7_pat].shape[0])*1.0/df_90.shape[0])

        # 催收
        dhb_callTelTotalNumCollHist = df_90[df_90.tel_type == '1'].call_number.nunique()

        # 评分卡模型
        result_dict = {
            "innet_months": int(innet_months),                                              # 在网时长
            "dinner_rent_fee": int(dinner_rent_fee),                                        # 最近一个完整月的电话套餐金额
            # 最近3个月最长关机天数(最长连续无通话记录天数(3个月))
            "hd_maxofflinedaysin3months": self.lenth_90,
            "hd_contactearlymorningrate": float(hd_contactearlymorningrate),             # 话单凌晨0点至7点通话占比p1
            "dhb_callTelTotalNumCollHist": int(dhb_callTelTotalNumCollHist),           # 历史所有催收类通话号码总个数p1
        }
        score = 603 + self._get_each_score(rating_rules, result_dict)

        return self._ret_handle(0, 'Success', {'dumiao_score':score})

stats = Stats()

def cal_result(**kwargs):
    """输出统计结果的15字段
    Arguments:
        detailed_list {list} -- 输入通话详单(5字段), 元素为元组;
        called_list {list} -- 被叫字段表, 元素为字符串;
        judge_time {list} -- 基准时间, "%Y%m%d";
    Returns:
        dict -- 输出参数，{"status":0, "message":'Success', "content":{...}}
    """
    # 调用统计方法
    return stats.get_result(kwargs['detailed_list'], kwargs['called_list'], kwargs['judge_time'])


if __name__ == '__main__':
    # ['call_number', 'call_time', 'tel_type', 'phone_time', 'dinner_rent_fee', 'innet_months']
    detailed_list = [
        ("10081", "20180112070353", "01", "20180212", 0, 1),
        ("10081", "20180201180353", "01", "20180212", "12", "24"),
        ("10081", "20180213080353", "01", "20180212", "12", "24"),
        ("10082", "20180212080353", ")1", "20180212", "34", "1"),
        ("10083", "20180212080353", ")1", "20180212", "12", "50"),

    ]
    st_time = time.time()
    ret = cal_result(detailed_list=detailed_list, called_list=[
        '02', '04'], judge_time="20180215")
    print(ret)
    print('time used:{}'.format(time.time()-st_time))

hd_maxofflinedaysin3months ([-inf, -8887, 1, 2, 7, inf], [-17, 25, 17, -25, -50]) 0    20
Name: hd_maxofflinedaysin3months, dtype: int64
-50
dinner_rent_fee ([-inf, -8887, 30, 55, 118, inf], [-25, -8, 15, 30, 88]) 0    0
Name: dinner_rent_fee, dtype: int64
-8
dhb_callTelTotalNumCollHist ([-inf, 1, inf], [23, -53]) 0    0
Name: dhb_callTelTotalNumCollHist, dtype: int64
23
innet_months ([-inf, -8887, 15, 25, 40, 52, 72, inf], [-9, -69, -25, -17, -1, 31, 42]) 0    1
Name: innet_months, dtype: int64
-69
hd_contactearlymorningrate ([-inf, -8887, 0.01, 0.02, 0.03, 0.05, inf], [-18, 27, 11, -21, -24, -63]) 0    0.0
Name: hd_contactearlymorningrate, dtype: float64
27
{'status': 0, 'content': {'dumiao_score': 526}, 'message': 'Success'}
time used:0.0379998683929


In [52]:
# -*- coding: utf-8 -*-
import time

import traceback
from datetime import datetime as DT
from functools import partial
import pandas as pd
import numpy as np

rating_rules = {
    'innet_months': ([float('-inf') , -8887, 15, 25, 40, 52, 72, float('inf')], [-9, -69, -25, -17, -1, 31, 42]),
    'dinner_rent_fee': ([float('-inf'), -8887, 30, 55, 118, float('inf')], [-25, -8, 15, 30, 88]),
    'hd_maxofflinedaysin3months': ([float('-inf'), -8887, 1, 2, 7, float('inf')], [-17, 25, 17, -25, -50]),
    'hd_contactearlymorningrate': ([float('-inf'), -8887, 0.01, 0.02, 0.03, 0.05, float('inf')], [-18, 27, 11, -21, -24, -63]),
    'dhb_callTelTotalNumCollHist': ([float('-inf'), 1, float('inf')], [23, -53]),
}

class Stats(object):
    """
    统计处理类
    """
    def __init__(self):
        """
        初始化类
        """
        self.err_msg = list()

    def _ret_handle(self, status, message, content):
        """
        初始化输出格式
        """
        return {'status': status, 'message': message, 'content': content}

    def _check_begin_time(self, bg_time):
        """
        检查通话时间&格式化
        """
        if not bg_time:
            return -1
        try:
            return DT.strptime("".join(bg_time.split()), "%Y%m%d%H%M%S")
        except:
            self.err_msg.append(traceback.format_exc() +
                                '--error_begin_time:{}'.format(bg_time))
            return -1

    def _check_call_duration(self, bi_time):
        """
        检查通话时长&格式化
        """
        if not bi_time:
            return -1
        try:
            return int("".join(bi_time.split()))
        except:
            self.err_msg.append(traceback.format_exc() +
                                '--error_begin_time:{}'.format(bi_time))
            return -1

    def _count_blank(self, date_x, date_y, judge_time):
        """
        关机天数计数
        """
        lenth_day = date_y - date_x
        blank_day = lenth_day.days
        judge_sec = (judge_time - date_x).total_seconds()
        # 近3个月
        if judge_sec < 7776000 and blank_day > self.lenth_90:
            self.lenth_90 = blank_day
        return date_y

    def _get_each_score(self, rating_rules, data):
        """
        评分卡算法实现
        """
        score_total = 0
        df = pd.DataFrame([data])
        for k, v in rating_rules.items():
            print k,v
            out_bins = pd.cut(df[k], bins=v[0])
            counts = pd.value_counts(out_bins).sort_index()
            score = counts.multiply(np.array(v[1])).sum()
            print score
            score_total += score
        return score_total

    def get_result(self, data, judge_time):
        """
        获取Base结果
        """
        if not data:
            return self._ret_handle(-1, 'No Data', None)

        labels = ['call_number', 'call_time', 'tel_type', 'phone_time', 'dinner_rent_fee', 'innet_months']
        ex_label = ['dinner_rent_fee', 'innet_months']
        try:
            df = pd.DataFrame.from_records(data, columns=labels, exclude=ex_label)
        except:
            return self._ret_handle(-1, traceback.format_exc(), None)
        
        # 在网时长&套餐金额
        try:
            innet_months = data[0][5] if data[0][5] != None else -8887 
            dinner_rent_fee = data[0][4] if data[0][4] != None else -8887
        except:
            innet_months = -8887
            dinner_rent_fee = -8887

        # 参数校验&格式化
        judge_time = DT.strptime(judge_time, "%Y%m%d")
        self.err_msg = list()
        df['call_time'] = df.call_time.map(self._check_begin_time)
        df['phone_time'] = df.phone_time.apply(pd.to_datetime, format="%Y%m%d")
        df = df[~df.call_time.isin([-1, ])]
        # print(self.err_msg, len(self.err_msg)*1.0/len(data))
        if len(self.err_msg)*1.0/len(data) >= 1:
            return self._ret_handle(-1, self.err_msg, None)

        # 公共方法
        days_pat = lambda x, y, z: 1 if y < (judge_time-x).days <= z else 0
        df_pat = lambda x,y: df[df.phone_time.apply(days_pat, args=(x, y)) == 1]

        # 数据集加工
        df_90 = df_pat(-1, 90)

        # 关机统计
        blank_ret = set(df_90.call_time.tolist())
        self.lenth_90 = 0
        reduce(partial(self._count_blank, judge_time=judge_time), sorted(blank_ret))


        # 通话次数&时长/凌晨通话占比
        # pat
        lower_7_pat = (df_90.call_time.dt.hour >= 0) & (df_90.call_time.dt.hour < 7)
        hd_contactearlymorningrate = "{:.2f}".format((df_90[lower_7_pat].shape[0])*1.0/df_90.shape[0])
    
        # 催收
        dhb_callTelTotalNumCollHist = df_90[df_90.tel_type == '1'].call_number.nunique()

        # 评分卡模型
        result_dict = {
            "innet_months": int(innet_months),                                         # 在网时长
            "dinner_rent_fee": int(dinner_rent_fee),                                   # 最近一个完整月的电话套餐金额
            # 最近3个月最长关机天数(最长连续无通话记录天数(3个月))
            "hd_maxofflinedaysin3months": int(self.lenth_90),
            "hd_contactearlymorningrate": float(hd_contactearlymorningrate),             # 话单凌晨0点至7点通话占比p1
            "dhb_callTelTotalNumCollHist": int(dhb_callTelTotalNumCollHist),           # 历史所有催收类通话号码总个数p1
        }
        print result_dict
        score = 603 + self._get_each_score(rating_rules, result_dict)
        
        return self._ret_handle(0, 'Success', {'dumiao_score':int(score)})

stats = Stats()

def cal_result(**kwargs):
    """输出统计结果的15字段
    Arguments:
        detailed_list {list} -- 输入通话详单(5字段), 元素为元组;
        judge_time {list} -- 基准时间, "%Y%m%d";
    Returns:
        dict -- 输出参数，{"status":0, "message":'Success', "content":{...}}
    """
    # 调用统计方法
    return stats.get_result(kwargs['detailed_list'], kwargs['judge_time'])


if __name__ == '__main__':
    # ['call_number', 'call_time', 'tel_type', 'phone_time', 'dinner_rent_fee', 'innet_months']
    detailed_list = [
        ("10081", "20180112060353", "01", "20180212", None, None),
        ("10081", "20180201180353", "01", "20180212", "12", "24"),
        ("10081", "20180213080353", "01", "20180212", "12", "24"),
        ("10082", "20180212080353", "1", "20180212", "34", "1"),
        ("10083", "20180212080353", "1", "20180212", "12", "50"),

    ]
    st_time = time.time()
    ret = cal_result(detailed_list=detailed_list, called_list=[
        '02', '04'], judge_time="20180215")
    print(ret)
    print('time used:{}'.format(time.time()-st_time))

{'hd_maxofflinedaysin3months': 20, 'dinner_rent_fee': -8887, 'dhb_callTelTotalNumCollHist': 2, 'innet_months': -8887, 'hd_contactearlymorningrate': 0.2}
hd_maxofflinedaysin3months ([-inf, -8887, 1, 2, 7, inf], [-17, 25, 17, -25, -50])
-50
dinner_rent_fee ([-inf, -8887, 30, 55, 118, inf], [-25, -8, 15, 30, 88])
-25
dhb_callTelTotalNumCollHist ([-inf, 1, inf], [23, -53])
-53
innet_months ([-inf, -8887, 15, 25, 40, 52, 72, inf], [-9, -69, -25, -17, -1, 31, 42])
-9
hd_contactearlymorningrate ([-inf, -8887, 0.01, 0.02, 0.03, 0.05, inf], [-18, 27, 11, -21, -24, -63])
-63
{'status': 0, 'content': {'dumiao_score': 403}, 'message': 'Success'}
time used:0.0360000133514
