In [1]:
import pandas as pd

from pandas.core.frame import DataFrame
from pandas.core.groupby.generic import DataFrameGroupBy
from pandas.core.indexes.datetimes import DatetimeIndex

from pathlib import Path, PosixPath

from tqdm.notebook import tqdm

In [2]:
class HandleData:
    def __init__(self) -> None:
        self.data_path = Path('./data/')
        self.weekday_list = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
    
    def _grouping(self, groups: DataFrameGroupBy) -> list:
        mon_mean = groups.get_group('Monday').mean()
        tue_mean = groups.get_group('Tuesday').mean()
        wed_mean = groups.get_group('Wednesday').mean()
        thur_mean = groups.get_group('Thursday').mean()
        fri_mean = groups.get_group('Friday').mean()
        sat_mean = groups.get_group('Saturday').mean()
        sun_mean = groups.get_group('Sunday').mean()
        
        return [mon_mean, tue_mean, wed_mean, thur_mean, fri_mean, sat_mean, sun_mean]

In [3]:
class HandleSepNov(HandleData):
    def __init__(self) -> None:
        super().__init__()
        
        self.sep_nov_data_path = Path.joinpath(self.data_path, '速度(9-11月)')
        self.day_name = self._get_day_name()
        
    def _get_day_name(self) -> DatetimeIndex:
        return pd.date_range(
            start='2020-09-01',
            end='2020-11-30',
            freq='D'
        ).drop(
            pd.date_range(
                start='2020-10-01',
                end='2020-10-08',
                freq='D'
            )
        ).day_name()
    
    def _pretreatment(self, file: PosixPath) -> DataFrame:
        df = pd.read_excel(
                Path.joinpath(self.sep_nov_data_path, file.name)
            ).set_index(['Unnamed: 0'])
        
        df.index.name = 'time'
        df.columns = self.day_name
        
        return df
        
    def _save(self, result_list: list, file: PosixPath) -> None:
        with pd.ExcelWriter('./result/{}'.format(file.name)) as f:
            for num in range(len(result_list)):
                result_series = result_list[num]
                name = self.weekday_list[num]

                weekday_result = result_series.to_frame()
                weekday_result.columns = ['9-11月']
                
                df = pd.concat([
                    pd.DataFrame(columns=[name+'速度均值']),
                    weekday_result,
                    pd.DataFrame(columns=['3月']),
                    pd.DataFrame(columns=['4月'])
                ])

                df.to_excel(
                    f,
                    sheet_name=name
                )

    def run(self):
        print('正在处理9-11月的数据，请稍候…')
        
        with tqdm(total=231) as pbar:
            for file in self.sep_nov_data_path.iterdir():
                if file.suffix == '.xlsx':
                    df = self._pretreatment(file).T

                    groups = df.groupby(df.index)
                    result_list = self._grouping(groups)

                    self._save(result_list, file)
                pbar.update(1)

In [4]:
class HandleMar(HandleData):
    def __init__(self) -> None:
        super().__init__()
        
        self.mar_data_path = Path.joinpath(self.data_path, '速度(3月)')
        self.day_name = self._get_day_name()
        
    def _get_day_name(self) -> DatetimeIndex:
        return pd.date_range(
            start='2020-03-01',
            end='2020-04-01',
            freq='D',
            closed='left'
        ).day_name()
    
    def _pretreatment(self, file: PosixPath) -> DataFrame:
        df = pd.read_excel(
                Path.joinpath(self.mar_data_path, file.name)
            ).set_index(['Unnamed: 0'])
        
        df.index.name = 'time'
        df.columns = self.day_name
        
        return df
        
    def _save(self, result_list: list, file: PosixPath) -> None:
        with pd.ExcelWriter('./result/{}'.format(file.name)) as writer:
            for num in range(len(result_list)):
                result_series = result_list[num]
                name = self.weekday_list[num]

                weekday_result = result_series.to_frame()
                weekday_result.columns = ['3月']
                
                df = pd.DataFrame(columns=[name+'速度均值', '9-11月'])
                result = pd.concat([
                    df,
                    weekday_result,
                    pd.DataFrame(columns=['4月'])
                ])

                result.to_excel(
                    writer,
                    sheet_name=name
                )
                
    def _append(self, result_list: list, file: PosixPath) -> None:
        excel_list = []
        with pd.ExcelFile('./result/{}'.format(file.name)) as f:
            for num in range(len(self.weekday_list)):
                excel_list.append(
                    f.parse(sheet_name=f.sheet_names[num]).set_index(['Unnamed: 0'])
                )
                
        Path.joinpath(Path('./result/{}'.format(file.name))).unlink()
            
        with pd.ExcelWriter('./result/{}'.format(file.name)) as writer:
            for num in range(len(result_list)):
                result_series = result_list[num]
                name = self.weekday_list[num]

                weekday_result = result_series.to_frame()
                weekday_result.columns = ['3月']
                
                result = pd.concat([
                    excel_list[num].drop(['3月'], axis=1).drop(['4月'], axis=1),
                    weekday_result,
                    pd.DataFrame(columns=['4月'])
                ], axis=1)
                result.to_excel(
                    writer,
                    sheet_name=name
                )
            
    def run(self):
        print('正在处理3月的数据，请稍候…')
        
        with tqdm(total=232) as pbar:
            for file in self.mar_data_path.iterdir():
                if file.suffix == '.xlsx':
                    df = self._pretreatment(file).T

                    groups = df.groupby(df.index)
                    result_list = self._grouping(groups)
                    
                    if Path.joinpath(Path('./result/{}'.format(file.name))).exists():
                        self._append(result_list, file)
                    else:
                        self._save(result_list, file)
                pbar.update(1)

In [5]:
class HandleApr(HandleData):
    def __init__(self) -> None:
        super().__init__()
        
        self.apr_data_path = Path.joinpath(self.data_path, '速度(4月)')
        self.day_name = self._get_day_name()
        
    def _get_day_name(self) -> DatetimeIndex:
        return pd.date_range(
            start='2020-04-01',
            end='2020-05-01',
            freq='D',
            closed='left'
        ).drop(
            pd.date_range(
                start='2020-04-04',
                end='2020-04-06',
                freq='D'
            )
        ).day_name()
    
    def _pretreatment(self, file: PosixPath) -> DataFrame:
        df = pd.read_excel(
                Path.joinpath(self.apr_data_path, file.name)
            ).set_index(['Unnamed: 0'])
        
        df.index.name = 'time'
        df.columns = self.day_name
        
        return df
        
    def _save(self, result_list: list, file: PosixPath) -> None:
        with pd.ExcelWriter('./result/{}'.format(file.name)) as writer:
            for num in range(len(result_list)):
                result_series = result_list[num]
                name = self.weekday_list[num]

                weekday_result = result_series.to_frame()
                weekday_result.columns = ['4月']
                
                df = pd.DataFrame(columns=[name+'速度均值', '9-11月', '3月'])
                result = pd.concat([df, weekday_result])

                result.to_excel(
                    writer,
                    sheet_name=name
                )
                
    def _append(self, result_list: list, file: PosixPath) -> None:
        excel_list = []
        with pd.ExcelFile('./result/{}'.format(file.name)) as f:
            for num in range(len(self.weekday_list)):
                excel_list.append(
                    f.parse(sheet_name=f.sheet_names[num]).set_index(['Unnamed: 0'])
                )
                
        Path.joinpath(Path('./result/{}'.format(file.name))).unlink()
            
        with pd.ExcelWriter('./result/{}'.format(file.name)) as writer:
            for num in range(len(result_list)):
                result_series = result_list[num]
                name = self.weekday_list[num]

                weekday_result = result_series.to_frame()
                weekday_result.columns = ['4月']
                
                result = pd.concat([excel_list[num].drop(['4月'], axis=1), weekday_result], axis=1)
                result.to_excel(
                    writer,
                    sheet_name=name
                )
            
    def run(self):
        print('正在处理4月的数据，请稍候…')
        
        with tqdm(total=233) as pbar:
            for file in self.apr_data_path.iterdir():
                if file.suffix == '.xlsx':
                    df = self._pretreatment(file).T

                    groups = df.groupby(df.index)
                    result_list = self._grouping(groups)
                    
                    if Path.joinpath(Path('./result/{}'.format(file.name))).exists():
                        self._append(result_list, file)
                    else:
                        self._save(result_list, file)
                pbar.update(1)

In [6]:
def main():
    HandleSepNov().run()
    HandleMar().run()
    HandleApr().run()

if __name__ == '__main__':
    main()

正在处理9-11月的数据，请稍候…


  0%|          | 0/231 [00:00<?, ?it/s]

正在处理3月的数据，请稍候…


  0%|          | 0/232 [00:00<?, ?it/s]

正在处理4月的数据，请稍候…


  0%|          | 0/233 [00:00<?, ?it/s]