In [None]:
import xml.etree.ElementTree as et
import pandas as pd
import os

class Make_Traffic_Data():
    def __init__(self,morning_dir:str='/',evening_dir:str='/',base_data_dir:str='/',map_save_dir:str='/'):
        '''
        Traffic_Analysis uses a xml file which is a traffic data and a csv file which is a coordinates and
        the information of a specific region. And saves the result map and uses it to show the result.
        The xml directories are split into two. One for morning traffic when people go to work. And
        evening traffic when people go home. all the xml, csv directories must have a file in it, and 
        the csv folder only uses one file.
        --------------------------------------
        morning_dir : The directory which stores the morning traffic data (must contain xml file)
        evening_dir : The directory which stores the evening traffic data (must contain xml file)
        base_data_dir : The directory which stores the base_data data (must contain one csv file)
        map_save_dir : The directory which to save the map into a html file
        --------------------------------------
        '''
        if morning_dir[-1]!='/':
            morning_dir+='/'
        
        if evening_dir[-1]!='/':
            evening_dir+='/'
    
        mor_xml=[f for f in os.listdir(morning_dir) if f[-4:]=='.xml']
        eve_xml=[f for f in os.listdir(evening_dir) if f[-4:]=='.xml']
        
        if not len(mor_xml) or not len(eve_xml):
            raise FileExistsError('There is no xml file')
        
        self.mor_traffic_data={}
        self.eve_traffic_data={}
        
        for xml in mor_xml:
            full_path=morning_dir+xml
            tree=et.parse(full_path)
            root=tree.getroot()
            
            tmp=dict(road_id=[],start_node=[],avg_speed=[])
            tmp['road_id']=list(map(lambda x:int(x.text),root.findall('.//data//roadsectionid')))
            tmp['start_node']=list(map(lambda x:int(x.text),root.findall('.//data//startnodeid')))
            tmp['avg_speed']=list(map(lambda x:int(x.text),root.findall('.//data//avgspeed')))
            
            self.mor_traffic_data[xml[:-4]]=tmp
            
        for xml in eve_xml:
            full_path=evening_dir+xml
            tree=et.parse(full_path)
            root=tree.getroot()
            
            tmp=dict(road_id=[],start_node=[],avg_speed=[])
            tmp['road_id']=list(map(lambda x:int(x.text),root.findall('.//data//roadsectionid')))
            tmp['start_node']=list(map(lambda x:int(x.text),root.findall('.//data//startnodeid')))
            tmp['avg_speed']=list(map(lambda x:int(x.text),root.findall('.//data//avgspeed')))
            
            self.eve_traffic_data[xml[:-4]]=tmp
        
        self.morning_data=[data[:-4] for data in mor_xml]
        self.evening_data=[data[:-4] for data in eve_xml]
                
        if base_data_dir[-1]!='/':
            base_data_dir+='/'
            
        file=base_data_dir+os.listdir(base_data_dir)[0]
        if file[-4:]!='.csv':
            raise FileExistsError('There is no csv file')
        
        self.base_data=pd.read_csv(file)
        
        if map_save_dir[-1]!='/':
            map_save_dir+='/'
            
        if not os.path.exists(map_save_dir):
            os.mkdir(map_save_dir)
            
        self.map_save_dir=map_save_dir 
    
    def __get_jam_value(self,road_id:list=None,start_node:list=None,
                      avg_speed:list=None,df:pd.core.frame.DataFrame=None,index:int=0):
        '''
        __get_jam_value calculates the jam value using the road_id, start_node, avg_speed data
        from the mor_traffic_data or the eve_traffic data.
        Jam value is calculated by the smooth and slow data in the df variable.
        --------------------------------
        road_id : the road id of the road
        start node : the starting place id of the road
        avg_speed : avg speed of the road which is used to calculate jam value
        df : DataFrame which will be used to get jam value and add the jam value
        index : the jam value will be calculated by each dictionary value in 
                    the mor_traffic_data or the eve_traffic data
        --------------------------------
        '''
        tmp_df=pd.DataFrame({'road_id':road_id,'start_node':start_node,'avg_speed':avg_speed})
        
        df=pd.merge(left=df,right=tmp_df,left_on=['LINK_ID','F_NODE'],
                    right_on=['road_id','start_node'])
        del df['road_id']
        del df['start_node']
        
        df.loc[df['avg_speed']>=df['smooth'],'jam_value'+str(index)]=0
        indexer=pd.DataFrame([df['avg_speed']<df['smooth'],
                              df['avg_speed']>=df['slow']]).T.all(axis='columns')
        df.loc[indexer,'jam_value'+str(index)]=5
        df.loc[df['jam_value'+str(index)].isna(),'jam_value'+str(index)]=10
        
        df['avg_speeds_avgs']=df['avg_speeds_avgs']+df['avg_speed']
        
        del df['avg_speed']
        
        return df
    
    def __save_node_traffic_data(self,data_save_dir:str,time_line:str,
                               df:pd.core.frame.DataFrame=None):
        '''
        __save_node_traffic_data saves the traffic jam value based on the node.
        --------------------------------
        save_dir : directory which the data will be saved
        timeline : which data to be saved ('morning' or 'evening')
        df : DataFrame which will be used
        --------------------------------
        '''
        start_save_dir=data_save_dir+'start_Traffic_dir/'
        end_save_dir=data_save_dir+'end_Traffic_dir/'
        
        start_colnames=['F_NODE']
        end_colnames=['T_NODE']
        
        for name in df.columns:
            if name.startswith('jam_'):
                start_colnames.append(name)
                end_colnames.append(name)
        
        start_loc=df[['F_NODE','start_lat','start_lon','start_name']]
        end_loc=df[['T_NODE','end_lat','end_lon','end_name']]
        
        start_df=df[start_colnames].groupby('F_NODE').sum()
        end_df=df[end_colnames].groupby('T_NODE').sum()
        
        start_df['total_jam']=start_df.iloc[:,1:6].sum(axis=1)
        end_df['total_jam']=end_df.iloc[:,1:6].sum(axis=1)
        
        start_df=pd.merge(left=start_df,right=start_loc,on='F_NODE')
        end_df=pd.merge(left=end_df,right=end_loc,on='T_NODE')
        
        start_df.to_csv(start_save_dir+time_line+'.csv',index=False,encoding='utf-8')
        end_df.to_csv(end_save_dir+time_line+'.csv',index=False,encoding='utf-8')
        
        print('files saved!')
        
    def make_data(self,data_save_dir:str='/'):
        '''
        make_data saves the road/starting_node/ending_node traffic jam value using two methods
        __get_jam_value, __save_node_traffic_data
        --------------------------------
        data_save_dir : directory which the data will be saved
        --------------------------------
        '''
            
        if data_save_dir[-1]!='/':
            data_save_dir+='/'
            
        if not os.path.exists(data_save_dir):
            os.mkdir(data_save_dir)
            
        try:
            os.mkdir(data_save_dir+'road_Traffic_dir/')
            os.mkdir(data_save_dir+'start_Traffic_dir/')
            os.mkdir(data_save_dir+'end_Traffic_dir/')
        except:
            pass
        
        copied_df=self.base_data.copy()
        copied_df['avg_speeds_avgs']=0
        colnames=[]
        
        for i,data in enumerate(self.mor_traffic_data.items()):
            copied_df=self.__get_jam_value(df=copied_df,index=i,**data[1])
            colnames.append('jam_value'+str(i))
        copied_df['total_jam']=copied_df[colnames].sum(axis=1)
        copied_df['avg_speeds_avgs']=copied_df['avg_speeds_avgs']/len(colnames)
        copied_df.to_csv(data_save_dir+'road_Traffic_dir/morning.csv',index=False,encoding='utf-8')
        self.__save_node_traffic_data(data_save_dir,'morning',copied_df)
        
        for i,data in enumerate(self.eve_traffic_data.items()):
            copied_df=self.__get_jam_value(df=copied_df,index=i,**data[1])
        copied_df['total_jam']=copied_df[colnames].sum(axis=1)
        copied_df['avg_speeds_avgs']=copied_df['avg_speeds_avgs']/len(colnames)
        copied_df.to_csv(data_save_dir+'road_Traffic_dir/evening.csv',index=False,encoding='utf-8')
        self.__save_node_traffic_data(data_save_dir,'evening',copied_df)
        

In [None]:
test=Make_Traffic_Data('../xml_files/morning',
                      '../xml_files/evening/',
                     '../csv_files/',
                     '../html_files/')

In [None]:
test.make_data('../made_datasets')