In [None]:
# coding:utf-8
'''燃油及工信部整合数据，匹配车型库，完成车型ID与整车型号的映射

程序匹配前数据整理：
1.燃油&工信部数据.csv：
    [自定义ID,车系，排量，驱动形式，变速，功率，燃料，整备质量，发动机型号，长宽高]
2.车型库数据.csv:
    [车型库ID,车系，排量，驱动形式，变速，功率，燃料，整备质量，发动机型号，长，宽，高]
'''
import csv
import codecs

class FileCSV(object):
    def __init__(self):
        pass
    
    def read_file(self,file_name):
        '''读取csv文件，结果为以每行数据为列表（元素为每列属性）的总列表'''
        try:
            with open(file_name,'r') as file:
                contents = file.readlines()
        except FileNotFoundError:
            print('文件%s不存在' % file_name)
            return None
        else:
            results = [] 
            for line in contents:
                results.append(line.strip().split(','))
        return results
    
    def write_file(self,file_name,lists):
        with open(file_name,'w',newline='',encoding='utf-8') as f:
            write = csv.writer(f)
            write.writerows(lists)
    
    def utf8_2_gbk(self,file_name):
        '''将utf8格式的csv文件转为gbk格式'''
        newfile = "gbk_%s" % file_name
        try:
            with codecs.open(file_name, 'r', 'utf-8') as f:
                utf_str = f.read()
                out_gbk_str = utf_str.encode('GB18030')
        except FileNotFoundError:
            print('文件%s不存在' % file_name)
            return None
        else:
            with open(newfile,'wb') as ff:
                ff.write(out_gbk_str)
    
    def onebyone_2_onebymore(self,file_name):
        '''将一一对应的两列数据，转为一对多形式'''
        read_results = self.read_file(file_name)
        transformation_results = {}
        for i in read_results:
            if i[0] in transformation_results:
                transformation_results[i[0]].append(i[1])
            else:
                transformation_results[i[0]] = [i[1]]
        with open('onebymore_%s' % file_name,'w',newline='',encoding='utf-8') as f:
            write = csv.writer(f)
            for k,v in transformation_results.items():
                v = str(v).replace('[','').replace(']','').replace("'","").replace(' ','')
                write.writerow([k,str(v)])
        
        
class Match(object):
    def __init__(self,fuel_list,vehicle_list):
        # 燃油&工信部字段
        self.fuel_list = fuel_list
        self.fuel_id = fuel_list[0]
        self.fuel_series = fuel_list[1]
        self.fuel_displacement = fuel_list[2]
        self.fuel_drive = fuel_list[3]
        self.fuel_trans = fuel_list[4]
        self.fuel_power = fuel_list[5]
        self.fuel_type = fuel_list[6]
        self.fuel_weight = fuel_list[7]
        self.fuel_engine_code = fuel_list[8]
        
        self.fuel_long_width_height = fuel_list[9]
        
        # 车型库字段
        self.vehicle_list = vehicle_list
        self.vehicle_id = vehicle_list[0]
        self.vehicle_series = vehicle_list[1]
        self.vehicle_displacement = vehicle_list[2]
        self.vehicle_drive = vehicle_list[3]
        self.vehicle_trans = vehicle_list[4]
        self.vehicle_power = vehicle_list[5]
        self.vehicle_type = vehicle_list[6]
        self.vehicle_weight = vehicle_list[7]
        self.vehicle_engine_code = vehicle_list[8]
        
        self.vehicle_long = vehicle_list[9]
        self.vehicle_width = vehicle_list[10]
        self.vehicle_height = vehicle_list[11]
    
    # 整合工信部数据匹配类型：
    def exact_match(self):
        '''精确匹配 - 催毛求疵'''
        # 1.条件字段处理
        self.fuel_condition = ''
        self.vehicle_condition = ''
        for i in range(1,9):
            self.fuel_condition += self.fuel_list[i]
            self.vehicle_condition += self.vehicle_list[i]
        # 2.长宽高处理
        if self.fuel_condition == self.vehicle_condition:
            for j in [self.vehicle_long,self.vehicle_width,self.vehicle_height]:
                if j not in self.fuel_long_width_height:
                    return False
            return [self.fuel_id,self.vehicle_id,'精确匹配']
        else:
            return False
    
    def standard_match(self):
        '''标准匹配 - 忽略车系，整备质量，发动机型号'''
        # 1.条件字段处理
        self.fuel_condition = ''
        self.vehicle_condition = ''
        for i in range(2,7):
            self.fuel_condition += self.fuel_list[i]
            self.vehicle_condition += self.vehicle_list[i]
        # 2.长宽高处理
        if self.fuel_condition == self.vehicle_condition:
            for j in [self.vehicle_long,self.vehicle_width,self.vehicle_height]:
                if j not in self.fuel_long_width_height:
                    return False
            return [self.fuel_id,self.vehicle_id,'标准匹配']

    def fuzzy_match_1():
        '''模糊匹配 - ？'''
        pass
    
    # 未整合工信部数据匹配类型：
    def no_gov_exact_match(self):
        '''无工信部数据的精确匹配，即忽略长宽高限定条件'''
        # 1.条件字段处理
        self.fuel_condition = ''
        self.vehicle_condition = ''
        for i in range(1,9):
            self.fuel_condition += self.fuel_list[i]
            self.vehicle_condition += self.vehicle_list[i]
        if self.fuel_condition == self.vehicle_condition:
            return [self.fuel_id,self.vehicle_id,'无工信部-精确匹配']
        else:
            return False
        
    def no_gov_standard_match(self):
        '''无工信部数据的标准匹配 - 忽略整备质量，发动机型号'''
        # 1.条件字段处理
        self.fuel_condition = self.fuel_series
        self.vehicle_condition = self.vehicle_series
        for i in range(2,7):
            self.fuel_condition += self.fuel_list[i]
            self.vehicle_condition += self.vehicle_list[i]
        if self.fuel_condition == self.vehicle_condition:
            return [self.fuel_id,self.vehicle_id,'无工信部-标准匹配']
            

In [None]:
'''
工信部&燃油整合数据处理
'''
# 第一步，读取文件
# 实例化文件类
file = FileCSV()
fuel = file.read_file('fuel.csv')
vehicle = file.read_file('vehicle.csv')

# 结果初始化
results = []

# 第二步，遍历燃油&工信部数据总列表,实现各种匹配
count_print = 0
for fuel_single in fuel:
    count_print += 1
    print('第%s条记录正在匹配' % count_print)
    # 遍历车型库，匹配
    for vehicle_single in vehicle:
        # 实例化匹配类
        match = Match(fuel_single,vehicle_single)
        exact_result = match.exact_match()
        standard_result = match.standard_match()
        
        # 第三步，各种匹配结果存储
        if exact_result:
            results.append(exact_result)
        if standard_result:
            results.append(standard_result)
    
# 第四步，所有匹配结果写入csv，并转码
file.write_file('result.csv',results)
file.utf8_2_gbk('result.csv')
    
print('GameOver!')


In [None]:
'''
无工信部的燃油数据处理
'''
# 第一步，读取文件
# 实例化文件类
file = FileCSV()
no_gov_fuel = file.read_file('no_gov_fuel.csv')
no_gov_vehicle = file.read_file('no_gov_vehicle.csv')

# 结果初始化
no_gov_results = []

# 第二步，遍历无工信部的燃油数据总列表,实现各种匹配
count_print = 0
for fuel_single in no_gov_fuel:
    count_print += 1
    print('第%s条记录正在匹配' % count_print)
    # 遍历车型库，匹配
    for vehicle_single in no_gov_vehicle:
        # 实例化匹配类
        match = Match(fuel_single,vehicle_single)
        no_gov_exact_result = match.no_gov_exact_match()
        no_gov_standard_result = match.no_gov_standard_match()
        
        # 第三步，各种匹配结果存储
        if no_gov_exact_result:
            no_gov_results.append(no_gov_exact_result)
        if no_gov_standard_result:
            no_gov_results.append(no_gov_standard_result)
    
# 第四步，所有匹配结果写入csv，并转码
file.write_file('no_gov_result.csv',no_gov_results)
file.utf8_2_gbk('no_gov_result.csv')
    
print('GameOver!')




In [None]:
# 结果处理后，由一对一数据转为一对多数据

file = FileCSV()
transformation1 = file.onebyone_2_onebymore('no_gov_one_one.csv')



In [None]:
'''
历史结果与更新结果整合（更新结果和历史结果可能出现同整车型号交叉等）
将整理的[车型ID,历史总结果，更新总结果]形式csv文件
处理为[车型ID,结果整合]形式csv文件
'''
import csv

file_name = 'history_new_result.csv'
try:
    with open(file_name,'r') as file:
        contents = file.readlines()
except FileNotFoundError:
    print('文件%s不存在' % file_name)
else:
    for line in contents:
        line_list = line.strip().split(',')
        vehicle_id = line_list[0]
        result_history = line_list[1]
        result_new = line_list[2]

        if result_history:
            result_history_list = result_history.split(';')
        else:
            result_history_list = []
        if result_new:
            result_new_list = result_new.split(';')
        else:
            result_new_list = []
            
        finally_result = list(set(result_history_list + result_new_list))
        finally_result.sort()
        
        print(vehicle_id,finally_result)
        
        # 列表相加去重




In [None]:
a = ['cc','aa','bb','dd']
b = ['aa','bb']
c = a + b
d = list(set(c))
print(d)
d.sort()
print(d)