In [9]:
import os
import pandas as pd

class Load_datasets:
    def __init__(self, city_data_filename='/home/zack/projet_pollution_occitanie/data/IQA.csv', zip_code_filename='/home/zack/projet_pollution_occitanie/data/dept.xlsx'):
        """
        Initialize the Load_datasets.

        Parameters:
        - city_data_filename (str): Name of the CSV file containing the city dataset.
        - zip_code_filename (str): Name of the Excel file containing the zip codes.
        """
        self.city_data_filename = city_data_filename
        self.zip_code_filename = zip_code_filename

    def load_city_data(self):
        """
        Load the city dataset from the CSV file.
        """
        city_data_path = os.path.join(os.getcwd(), self.city_data_filename)
        city_df = pd.read_csv(city_data_path,usecols=['code_no2',
        'code_o3',
        'code_pm10',
        'code_pm25',
        'code_qual',
        'code_so2',
        'code_zone',
        'coul_qual',
        'date_dif',
        'date_ech',
        'lib_qual',
        'lib_zone'])
        return city_df

    def load_zip_codes(self):
        """
        Load the zip codes from the Excel file.
        """
        zip_code_path = os.path.join(os.getcwd(), self.zip_code_filename)
        zip_code_df = pd.read_excel(zip_code_path)
        return zip_code_df

    def IQA(self, department):
        """
        Filter the city dataset by zip codes associated with a specific department.

        Parameters:
        - department (str): The department to filter by.

        Returns:
        - pd.DataFrame: Filtered dataset for the specified department.
        """
        # Read xlsx file that contains each code_zone of the 13 departments of Occitanie
        zip_code_df = self.load_zip_codes()
        # Get  13 clean (i.e doesn't contain empty values) lists that contains the codes of zones of each departments from our data frame "dept"
        dept_lists = [zip_code_df[col].dropna().tolist() for col in zip_code_df.columns]   
        # From the "dept" df get the names of each department 
        dept_names = zip_code_df.columns.to_list() 
        # Create a dictionary that assign each dept to it's codes of zone   
        data = dict(zip(dept_names,dept_lists))        
        
        # Load city and zip code data
        city_df = self.load_city_data()
        # Create a copy to 
        # city_df2 = city_df.copy()
        # Filter cities for the specified department
        department_df = city_df[city_df['code_zone'].isin(data[department])]
        return department_df

# Example usage
# Create an instance of Load_datasets with default file names
Load_data = Load_datasets()
# Filter cities for a specific department (e.g., 'HR')
hr_department_data = Load_datasets().IQA('Gers')
hr_department_data

# Display the resulting dataset


Unnamed: 0,date_ech,code_qual,lib_qual,coul_qual,date_dif,code_zone,lib_zone,code_no2,code_so2,code_o3,code_pm10,code_pm25
4,2023/11/19 00:00:00+00,2,Moyen,#50CCAA,2023/11/18 09:00:00+00,200042372,CC des Coteaux Arrats Gimone,1,1,2,2,2
20,2023/11/19 00:00:00+00,2,Moyen,#50CCAA,2023/11/18 09:00:00+00,248200016,CC des Deux Rives,2,1,2,2,2
22,2023/11/19 00:00:00+00,2,Moyen,#50CCAA,2023/11/18 09:00:00+00,243200425,CC Coeur d'Astarac en Gascogne,1,1,2,2,2
36,2023/11/19 00:00:00+00,2,Moyen,#50CCAA,2023/11/18 09:00:00+00,200034726,CC Bastides de Lomagne,1,1,2,2,2
38,2023/11/19 00:00:00+00,2,Moyen,#50CCAA,2023/11/18 09:00:00+00,243200607,CC Artagnan de Fezensac,1,1,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...
112769,2022/01/01 00:00:00+00,2,Moyen,#50CCAA,2022/01/02 09:00:00+00,200034726,CC Bastides de Lomagne,1,1,2,1,1
112782,2022/01/01 00:00:00+00,2,Moyen,#50CCAA,2022/01/02 09:00:00+00,200030435,CC d'Aire-sur-l'Adour,1,1,2,1,1
112786,2022/01/01 00:00:00+00,2,Moyen,#50CCAA,2022/01/02 09:00:00+00,243200508,CC Bastides et Vallons du Gers,1,1,2,1,1
112806,2022/01/01 00:00:00+00,2,Moyen,#50CCAA,2022/01/02 09:00:00+00,200066926,CA Grand Auch Coeur de Gascogne,1,1,2,1,1
