In [1]:
import os
import glob
import pandas as pd

In [2]:
class AirQualityDataProcessor:
    def __init__(self, dataset_dir, stations_info_file):
        """
        Initialize the processor with the dataset directory and stations info file.
        Args:
            dataset_dir (str): Path to the dataset directory.
            stations_info_file (str): Path to the stations info file.
        """
        self.dataset_dir = dataset_dir
        self.stations_info = pd.read_csv(stations_info_file)

    def process_files(self):
        """
        Process all CSV files in the dataset directory by adding state and city columns from stations info.
        """
        for file in glob.glob(os.path.join(self.dataset_dir, '*.csv')):
            if file != os.path.join(self.dataset_dir, 'E:\CDAC(2024)\Air Quality Project\PracticeData\Dataset\stations_info.csv'):
                df = pd.read_csv(file)
                filename = os.path.basename(file).split('.')[0]
                df = df.assign(file_name=filename).merge(self.stations_info[['file_name', 'state', 'city', 'station_location']], on='file_name', how='left').drop('file_name', axis=1)
                df.to_csv(file, index=False)

In [3]:
class CSVFileMerger:
    def __init__(self, input_directory, output_directory):
        """
        Initialize the CSV file merger with the input and output directories.
        Args:
            input_directory (str): Path to the input directory containing CSV files.
            output_directory (str): Path to the output directory where merged files will be stored.
        """
        self.input_directory = input_directory
        self.output_directory = output_directory

    def run(self):
        """
        Run the CSV file merger.
        """
        os.makedirs(self.output_directory, exist_ok=True)
        files = [f for f in os.listdir(self.input_directory) if f.endswith('.csv')]
        prefixes = set(f[:2] for f in files)
        for prefix in prefixes:
            files_to_merge = [f for f in files if f.startswith(prefix)]
            merged_df = pd.concat([pd.read_csv(os.path.join(self.input_directory, f)) for f in files_to_merge], ignore_index=True)
            merged_file_name = f"{prefix}.csv"
            output_file_path = os.path.join(self.output_directory, merged_file_name)
            merged_df.to_csv(output_file_path, index=False)
            print(f"Merged files for prefix '{prefix}' into '{output_file_path}'.")    

In [4]:
if __name__ == '__main__':
    dataset_dir = 'E:\CDAC(2024)\Air Quality Project\PracticeData\Dataset'
    stations_info_file = os.path.join(dataset_dir, 'E:\CDAC(2024)\Air Quality Project\PracticeData\Dataset\stations_info.csv')
    processor = AirQualityDataProcessor(dataset_dir, stations_info_file)
    processor.process_files()
    output_directory = 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files'
    merger = CSVFileMerger(dataset_dir, output_directory)
    merger.run()

Merged files for prefix 'KA' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\KA.csv'.
Merged files for prefix 'TG' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\TG.csv'.
Merged files for prefix 'BR' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\BR.csv'.
Merged files for prefix 'ML' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\ML.csv'.
Merged files for prefix 'MH' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\MH.csv'.
Merged files for prefix 'MZ' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\MZ.csv'.
Merged files for prefix 'OR' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\OR.csv'.
Merged files for prefix 'WB' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\WB.csv'.
Merged files for prefix 'UK' into 'E:\CDAC(2024)\Air Quality Project\PracticeData\Merged Files\UK.csv'.
Merged files for prefix 'CH' into 'E:\CDAC(2024)\Air Quality Pro