#### 输入：  
text:【现病史】段落文字 string  
recorddate: 记录时间 %Y-%m-%d string  
entrydate: 入院时间 %Y-%m-%d string  
#### 输出：
text string  

In [1]:
text = """患者今晨8：00左右，在家中，早饭后，突然出现头晕，无视物旋转，伴有恶心呕吐一次，呕吐胃内容物。患者站起来回房间休息时自觉胸闷，突发晕倒，意识不清，跌倒在地。无头痛，无耳鸣、听力下降，无呕血、黑便，无视物模糊，无四肢抽搐，无进食反呛，无言语含糊、面部及肢体麻木乏力感，无二便失禁，无面色苍白或紫绀，无大汗淋漓。约两分钟后转醒。醒后仍有头晕不适。我院急诊抢救室就诊，查心电图提示正常心电图，查头颅CT提示双侧基底节及半卵圆区散在腔隙性脑梗死、缺血灶，老年脑改变。
    患者自发病以来，神清，精神可，胃纳、夜眠可，二便如常，否认近期体重减轻。患者既往有冠心病病史10年，平时规律服用阿司匹林药物治疗，病情控制良好，十年前行PCI术，置入冠脉支架。；2型糖尿病病史10年，平时规律服用二甲双胍缓释片，吡格列酮分散片药物治疗，病情控制良好；高血压病病史10年，平时规律服用替米沙坦、倍他乐克药物治疗，病情控制良好，最高190/100mmHg。"""

In [9]:
import re
from datetime import datetime, timedelta

def standardize_time(text,recorddate,entrydate):#main function
    
    recorddate = recorddate
    entrydate = entrydate
    
    text = re.sub('－','-',text)
    
    #替换中文日期
    chinese_pattern = re.compile(r"(\d{4}[年-])?(1[0-2]|0?[1-9])[月-](3[01]|[12][0-9]|0?[1-9][日]?)\s*，?([上中下午晚夜间早凌晨起]+)?(2[0-4]|1[0-9]|0?[1-9][点:：时])?([0-5]?[0-9]分?)?")
    text = re.sub(chinese_pattern, lambda m: chinesedate(m,recorddate),text)

    #替换X天前等
    text = re.sub(r"(入院)?前?([0-9零一二两三四五六七八九十]+)([天周月年])前?|(入院当[天日])",lambda m: entry_day_sub(m,recorddate,entrydate),text)

    #替换昨天今天
    text = re.sub(r"([今昨][天日]?)([上中下午晚夜间早凌晨起]+)?([0-9]+点)?",lambda m: today_yesterday(m,recorddate),text)

    #只有时间的补上日期
    text = add_date2time(text,recorddate)
    
    return text


def chinesedate(match,recorddate):#中文日期转换
    if match.group() is None:
        return None

    else:
        print('匹配1:', match.group())
        res = {
            "year":match.group(1),
            "month": match.group(2),
            "day": match.group(3),
            "hour": match.group(5) if match.group(5) is not None else '00',
            "minute": match.group(6) if match.group(6) is not None else '00'
        }
        params = {}

        for name in res:
            tmp = None
            if res[name] is not None:
                tmp = re.search(r"[0-9]+",res[name]).group()
            if tmp is not None:
                params[name] = int(tmp)
        target_date = datetime.strptime(recorddate+' 00:00','%Y-%m-%d %H:%M').replace(**params) ##默认为记录时间
        if match.group(5) is None:#只有日期
            print('改为:', str(target_date).split()[0])
            return str(target_date).split()[0]
        else: 
            is_pm = match.group(4)
            if is_pm is not None and len(is_pm) != 0:
                if re.match(r"[下午晚夜间]+",is_pm) is not None:
                    hour = target_date.time().hour
                    if hour < 12:
                        target_date = target_date.replace(hour=hour + 12)
            print('改为:', str(target_date.strftime('%Y-%m-%d %H:%M')))
            return str(target_date.strftime('%Y-%m-%d %H:%M'))

def cn2dig(src): #中文数字
    UTIL_CN_NUM = {
        '零': 0, '一': 1, '二': 2, '两': 2, '三': 3, '四': 4,
        '五': 5, '六': 6, '七': 7, '八': 8, '九': 9,'十':10
    }
    if src == "":
        return None
    m = re.match("\d+", src)
    if m:
        return int(m.group())
    else:
        if src in UTIL_CN_NUM.keys():
            return int(UTIL_CN_NUM[src])
        else:
            return None

def entry_day_sub(match,recorddate,entrydate):
    if match.group() is None:
        return None
    else:
        print('匹配2:', match.group())

        if match.group(1) is not None:
            date = datetime.strptime(entrydate,'%Y-%m-%d')
        else: 
            date = datetime.strptime(recorddate,'%Y-%m-%d')

        if match.group(4) is not None: #入院当天
            target = date
        if match.group(2) is not None:
            num = cn2dig(match.group(2))
            if match.group(3) == '天': #X天前
                target = date-timedelta(days=num)
            if match.group(3) == '周': #X周前
                target = date-timedelta(days=num*7)
            if match.group(3) == '月': #X月前
                month = date.month
                target = date-timedelta(days=num*30)
            if match.group(3) == '年': #X年前
                year = date.year
                target = date-timedelta(days=num*365)  
        print('改为:', str(target).split()[0])
        return str(target).split()[0]+' '

def today_yesterday(match,recorddate): #今昨天
    keyDate = {'昨天':-1,'昨日':-1,'昨':-1,'今天': 0,'今日': 0, '今':0}
    is_pm = match.group(2)
    if match.group() is None:
        return None
    else:
        print('匹配3:',match.group())
        target = datetime.strptime(recorddate,'%Y-%m-%d') + timedelta(days=keyDate[match.group(1)])
        if match.group(3) is None:#如果只有日期 return date only
            if match.group(2) is not None: #保留下午上午类的词
                print('改为:',str(target).split()[0]+ match.group(2))
                return str(target).split()[0]+ match.group(2)
            else: 
                print('改为:',str(target).split()[0]+' ')
                return str(target).split()[0]+' '
        else:#若有日期+时间
            hour =int(re.search(r"[0-9]+",match.group(3)).group())
            if hour == 24: hour = 0
            target = target.replace(hour=hour)
            if is_pm is not None:
                if re.match(r"[下午晚夜间]+",is_pm) is not None:
                    if hour < 12:
                        target = target.replace(hour=hour + 12)
            print('改为:',str(target.strftime('%Y-%m-%d %H:%M')))
            return str(target.strftime('%Y-%m-%d %H:%M')) 

def added_date(sample,record_date): 
    m = re.findall(r"(\d{4}-\d{2}-\d{2})?\s?(\d{1,2}[:：]\d{1,2})?",sample) 
    tmp = None
    lst = {}
    for match in m:
        if match[0] != "": tmp = match[0] #储存最近一次出现的日期
        if match[0] == "" and match[1] != "":
            if tmp is not None: 
                time, added = match[1], tmp+' '+match[1].replace('：',':') 
            else:
                time, added = match[1], str(record_date)+' '+match[1].replace('：',':')
            lst[time] = added
    return lst

def add_date2time(sample,recorddate): #补日期
    lst = added_date(sample, recorddate)
    tmp = sample
    for date in lst:
        print('匹配4:', date)
        print('改为:', lst[date])
        tmp = re.sub(date,lst[date],tmp)
    return tmp


显示匹配更改结果:

In [11]:
new = standardize_time(text,'2018-04-16','2018-04-16')

匹配2: 10年
改为: 2008-04-18
匹配2: 十年前
改为: 2008-04-18
匹配2: 10年
改为: 2008-04-18
匹配2: 10年
改为: 2008-04-18
匹配3: 今晨
改为: 2018-04-16晨
匹配4: 8：00
改为: 2018-04-16 8:00


In [13]:
#更改完的段落
print(new)

患者2018-04-16晨2018-04-16 8:00左右，在家中，早饭后，突然出现头晕，无视物旋转，伴有恶心呕吐一次，呕吐胃内容物。患者站起来回房间休息时自觉胸闷，突发晕倒，意识不清，跌倒在地。无头痛，无耳鸣、听力下降，无呕血、黑便，无视物模糊，无四肢抽搐，无进食反呛，无言语含糊、面部及肢体麻木乏力感，无二便失禁，无面色苍白或紫绀，无大汗淋漓。约两分钟后转醒。醒后仍有头晕不适。我院急诊抢救室就诊，查心电图提示正常心电图，查头颅CT提示双侧基底节及半卵圆区散在腔隙性脑梗死、缺血灶，老年脑改变。
    患者自发病以来，神清，精神可，胃纳、夜眠可，二便如常，否认近期体重减轻。患者既往有冠心病病史2008-04-18 ，平时规律服用阿司匹林药物治疗，病情控制良好，2008-04-18 行PCI术，置入冠脉支架。；2型糖尿病病史2008-04-18 ，平时规律服用二甲双胍缓释片，吡格列酮分散片药物治疗，病情控制良好；高血压病病史2008-04-18 ，平时规律服用替米沙坦、倍他乐克药物治疗，病情控制良好，最高190/100mmHg。
