In [1]:
source activate 

In [1]:
import numpy as np
import os
import sys
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from functools import  partial
sys.path.insert(0, '/cluster/work/climate/dnikolo/n2o')
from Glaciation_time_estimator.Data_postprocessing.Job_result_fp_generator import generate_tracking_filenames
from Glaciation_time_estimator.Auxiliary_func.config_reader import read_config

In [2]:
config = read_config(
    '/cluster/work/climate/dnikolo/n2o/Glaciation_time_estimator/config_half.yaml')

In [3]:
def Extract_array_from_df(series: pd.Series):
    if series.empty:
        return None
    return np.stack(series.values)

def get_glaciations_df(config):
    agg_fact = config['agg_fact']
    folder_name = f"{config['start_time'].strftime(config['time_folder_format'])}_{config['end_time'].strftime(config['time_folder_format'])}"
    pole=config["pole_folders"][0]
    fp = os.path.join(
                config['postprocessing_output_dir'],
                pole,
                folder_name,
                f"Agg_{agg_fact:02}_Glaciations.parquet"
            )
    try:
        return pd.read_parquet(fp)
    except FileNotFoundError:
        print(f"Skipping glaciations")
        return 

def get_combined_cloud_df(config):
    t_deltas = config['t_deltas']
    agg_fact = config['agg_fact']
    min_temp_array, max_temp_array = config['min_temp_arr'], config['max_temp_arr']
    folder_name = f"{config['start_time'].strftime(config['time_folder_format'])}_{config['end_time'].strftime(config['time_folder_format'])}"
    # Initialize an empty list to store the individual dataframes
    cloud_properties_df_list = []

    # Iterate over each temperature range
    for i in range(len(min_temp_array)):
        cloud_properties_df_list.append([])
        min_temp = min_temp_array[i]
        max_temp = max_temp_array[i]

        # Iterate over each pole
        for pole in config["pole_folders"]:
            # Construct the file path
            fp = os.path.join(
                config['postprocessing_output_dir'],
                pole,
                folder_name,
                f"Agg_{agg_fact:02}_T_{abs(round(min_temp)):02}_{abs(round(max_temp)):02}.parquet"
            )

            # Read the parquet file into a dataframe
            try:
                df = pd.read_parquet(fp)
            except FileNotFoundError:
                print(f"Skipping all clouds file: {pole} {min_temp} to {max_temp}")
                continue

            # Add columns for min_temp, max_temp, and pole
            df['min_temp'] = min_temp
            df['max_temp'] = max_temp
            df['pole'] = pole
            df['Hemisphere'] = "South" if pole == "sp" else "North"
            df['Lifetime [h]'] = df['track_length'] / pd.Timedelta(hours=1)
            df["Radius [km]"]=np.sqrt(df["avg_size[km]"]/np.pi)
            # Append the dataframe to the sublist
            cloud_properties_df_list[i].append(df)

    # Combine all dataframes into a single dataframe
    if len(cloud_properties_df_list)==0:
        return None
    return pd.concat(
        [df for sublist in cloud_properties_df_list for df in sublist], ignore_index=True)

In [4]:
def clasify_clouds(yearly_data):
    yearly_data["Level"] = pd.cut(
        yearly_data.avg_ctp,
        bins=[50, 440, 680, 1000],
        labels=["Cirro","Alto","Low"]
    )
    yearly_data["Optical Thickness"] = pd.cut(
        yearly_data.avg_cot,
        bins=[0, 3.6, 23, 379],
        labels=["Thin", "Medium", "Thick"]
    )

    yearly_data["Cloud type"] = list(zip(yearly_data["Level"],yearly_data["Optical Thickness"]))
    # Define mapping dictionary
    cloud_type_mapping = {
        ("Low", "Thin"): "Cumulus",
        ("Alto", "Thin"): "Altocumulus",
        ("Cirro", "Thin"): "Cirrus",
        ("Low", "Medium"): "Stratocumulus",
        ("Alto", "Medium"): "Altostratus",
        ("Cirro", "Medium"): "Cirrostratus",
        ("Low", "Thick"): "Stratus",
        ("Alto", "Thick"): "Nimbostratus",
        ("Cirro", "Thick"): "Deep convection",
    }

    # Apply mapping
    yearly_data["Cloud type"] = yearly_data["Cloud type"].map(cloud_type_mapping)

In [5]:
year=2021
# def combine_whole_year(year,working_months):
analysis_df_list = []
glaciation_df_list = []
months=[1,2,3,4,5,6,7,8,9,10,11,12]
for month in months:
    for part in range(1,3):
        print(f"Analysing {year}_tracking/{month:02}_{part:02}.yaml")
        config_fp = f'/cluster/work/climate/dnikolo/n2o/Glaciation_time_estimator/configs/{year}_tracking/{month:02}_{part:02}.yaml'
        temp_config = read_config(config_fp)
        df = get_combined_cloud_df(temp_config)
        if df is not None:
                analysis_df_list.append(df)
        else:
                print(f"Skiping month {month}")
        glaciation_df_list.append(get_glaciations_df(temp_config))
yearly_data = pd.concat(
        [df for df in analysis_df_list], ignore_index=True)
glaciations_data = pd.concat(
        [df for df in glaciation_df_list], ignore_index=True)
clasify_clouds(yearly_data)
clasify_clouds(glaciations_data)



yearly_data.to_parquet(f"/cluster/work/climate/dnikolo/Cloud_analysis/full_years/{year}_all.parquet") 
glaciations_data.to_parquet(f"/cluster/work/climate/dnikolo/Cloud_analysis/full_years/{year}_glac.parquet") 
        

Analysing 2021_tracking/01_01.yaml
Skipping all clouds file: np -6 to 0
Skipping all clouds file: sp -6 to 0
Skipping all clouds file: np -12 to -6
Skipping all clouds file: sp -12 to -6
Skipping all clouds file: np -18 to -12
Skipping all clouds file: sp -18 to -12
Skipping all clouds file: np -24 to -18
Skipping all clouds file: sp -24 to -18
Skipping all clouds file: np -30 to -24
Skipping all clouds file: sp -30 to -24
Skipping all clouds file: np -36 to -30
Skipping all clouds file: sp -36 to -30


ValueError: No objects to concatenate