## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm

## Declare Constants

In [11]:
dataset_folder = "datasets/UK/"
# health_climate_dataset = Path(dataset_folder)/Path("mental_health_climate.csv")
# health_summary =  Path(dataset_folder)/Path("mental_health_climate_summary.csv")

health_climate_dataset = Path(dataset_folder)/Path("respiratory_climate.csv")
health_summary =  Path(dataset_folder)/Path("respiratory_climate_summary.csv")

## 1. Load Updated Health Dataset
Load the updated health dataset, which has the nearest stations data added using the "map_station_health.ipynb"

In [12]:
df_health = pd.read_csv(health_climate_dataset)

### 1.1.Primary Impressions Counts (PIC)
**Group by:** [Time, SiteNumber, SiteName, Longitude, Latitude, PRIMARY IMPRESSION CODE]

In [13]:
df_pic = (
    df_health
    .groupby(['Time', 'SiteNumber', 'SiteName', 'Longitude', 'Latitude', 'PRIMARY IMPRESSION CODE', 'PRIMARY IMPRESSION'])
    .agg({
        'ch4_c': 'mean', 'go3': 'mean', 'hcho': 'mean', 'no': 'mean', 'no2': 'mean', 'pm10': 'mean',
        'pm2p5': 'mean', 't': 'mean', 'u': 'mean', 'uvbed': 'mean', 'uvbedcs': 'mean', 'v': 'mean',
        'aurn_go3': 'mean', 'aurn_go3_max': 'mean', 'aurn_no2': 'mean', 'aurn_pm10': 'mean', 'aurn_pm25': 'mean',
        'CH4_S': 'mean', 'CO_S': 'mean', 'HCHO_S': 'mean', 'NO2_S': 'mean'
    })
    .reset_index()
)

# Add the primary_impression_count separately
df_pic['primary_impression_count'] = df_health.groupby([
    'Time', 'SiteNumber', 'SiteName', 'Longitude', 'Latitude', 'PRIMARY IMPRESSION CODE', 'PRIMARY IMPRESSION'
]).size().values


#### Rename columns to make them shorter

In [14]:
# rename columns to make them shorter
df_pic.rename(
    columns={
        'PRIMARY IMPRESSION CODE': 'Primary_Impression_Code',
        'PRIMARY IMPRESSION': 'Primary_Impression',
        'primary_impression_count': 'Primary_Impression_Count',
    }, 
    inplace=True
)

#### 1.3.3. Save Summarised Health Data

In [15]:
df_pic.to_csv(health_summary, index=False)