In [16]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import copy
from operator import itemgetter
import numpy as np
from pathlib import Path

def initially_preprocessed_data_frame():
    path_to_data = "/home/pc/moje/python_projects/adsorbents_isotherms/Data/all_data.csv"
    print(path_to_data)
    df = pd.read_csv(path_to_data)
    print("------------------------------- Preprocessing_data start -------------------------------")
    print(df.info())
    df['isotherm_X'] = df['isotherm_X'].apply(json.loads) # make it as float list
    df['isotherm_Y'] = df['isotherm_Y'].apply(json.loads) # make it as float list
    df['isotherm_X_Y'] = df['isotherm_X_Y'].apply(json.loads) # make it as float list
    df['axis_minx_maxx_miny_maxy']  = df['axis_minx_maxx_miny_maxy'].apply(json.loads) # make it as float list
    
    df['isotherm_X'] = df['isotherm_X'].apply(lambda x: [abs(num) for num in x]) # remove the minus sign
    df['isotherm_Y'] = df['isotherm_Y'].apply(lambda y: [abs(num) for num in y]) # remove the minus sign
    
    constant = 22.414
    df['isotherm_Y'] = df.apply( lambda row: [value * constant for value in row['isotherm_Y']] if row['Y_axis_type'] == 'mmol/g' else row['isotherm_Y'], axis=1 )
    df['isotherm_X_Y'] = df.apply( lambda row: [[value[0],(value[1] * constant)] for value in row['isotherm_X_Y']] if row['Y_axis_type'] == 'mmol/g' else row['isotherm_X_Y'], axis=1 )
    df['isotherm_X_Y'] = df.apply( lambda row: [[value[0],(value[1] * 1000)] for value in row['isotherm_X_Y']] if row['Y_axis_type'] == 'x1000' else row['isotherm_X_Y'], axis=1 )
    df['isotherm_X_Y'] = df.apply( lambda row: [[(value[0] / 100), value[1]] for value in row['isotherm_X_Y']] if row['Article_name'] == 'Data78' else row['isotherm_X_Y'], axis=1 )
    # print(df['isotherm_X_Y'] )

    # deleteing samples -------------
    df = df[~((df['Article_name'] == 'Data73') )]
    df = df[~((df['Article_name'] == 'Data15') )]
    df = df[~((df['Article_name'] == 'Data34') )]
    df = df[~((df['Article_name'] == 'Data7') & (df['Sample_name'] == 'RK-500')  & (df['Figure_number'] == '3'))]
    df = df[~((df['Article_name'] == 'Data44') & (df['Sample_name'] == 'UAK2-800d-without_washing')  & (df['Figure_number'] == '5'))]
    df = df[~((df['Article_name'] == 'Data94') & (df['Sample_name'] == 'UFFA-2-500')  & (df['Figure_number'] == '2_1'))]
    df = df[~((df['Article_name'] == 'Data23') & (df['Sample_name'] == '0%BO')  & (df['Figure_number'] == '2_2'))]
    df = df[~((df['Article_name'] == 'Data33') & (df['Sample_name'] == '600')  & (df['Figure_number'] == '2'))]
    df = df[~((df['Article_name'] == 'Data91') & (df['Sample_name'] == 'AC-MM5')  & (df['Figure_number'] == '3_3'))]
    df = df[~((df['Article_name'] == 'Data91') & (df['Sample_name'] == 'AC-MM7')  & (df['Figure_number'] == '3_3'))]
    df = df[~((df['Article_name'] == 'Data158') & (df['Sample_name'] == 'CS1000A')  & (df['Figure_number'] == '4'))]
    df = df[~((df['Article_name'] == 'Data158') & (df['Sample_name'] == 'CS1000AF')  & (df['Figure_number'] == '4'))]
    # deleteing samples -------------
    return df

def fit_shape_of_izotherms(oryginal_izotherms, new_size, x_range_to_cut): #sorting, add or remove points between

    def sort_isotherms(oryginal_izotherms): #sort isotherm from (0,0) to (1,...)
        list_to_be_change = copy.deepcopy(oryginal_izotherms)
        izotherms_x_y_normalized = []
        for x_y in list_to_be_change:
            temp=[]
            temp = sorted(x_y, key=itemgetter(0)) # at index 0 from each element in x_y, so sorting by x value from 0 to 1
            izotherms_x_y_normalized.append(temp)
        return izotherms_x_y_normalized

    def cut_x_range(oryginal_izotherms, x_range):
        if(x_range):
            print(f'return x range from {x_range}')
            ist_to_be_change = copy.deepcopy(oryginal_izotherms)
            izotherms_x_y_normalized = []
            for x_y in list_to_be_change:
                filtered_data = [pair for pair in x_y if pair[0] >= x_range]
                izotherms_x_y_normalized.append(filtered_data)
            return izotherms_x_y_normalized
        else:
            print(f'x_range_to_cut->{x_range}, return whole x range')
            return oryginal_izotherms

    def add_new_points_in_first_iteration(oryginal_izotherm):
        izotherm_lenght = len(oryginal_izotherm)
        new_izoterm = []
        for p in range(izotherm_lenght):
            if(izotherm_lenght > p+1):
                new_izoterm.append(oryginal_izotherm[p])
                new_x = (oryginal_izotherm[p][0] + oryginal_izotherm[p+1][0])/2
                new_y = (oryginal_izotherm[p][1] + oryginal_izotherm[p+1][1])/2
                new_izoterm.append([new_x,new_y])
            else: new_izoterm.append(oryginal_izotherm[p])
        return new_izoterm            
        
    def add_new_points_in_specific_number(oryginal_izotherm, number_of_points):
        izotherm_lenght = len(oryginal_izotherm)
        new_izoterm = []
        for p in range(izotherm_lenght):
            if(number_of_points > p):
                new_izoterm.append(oryginal_izotherm[p])
                new_x = (oryginal_izotherm[p][0] + oryginal_izotherm[p+1][0])/2
                new_y = (oryginal_izotherm[p][1] + oryginal_izotherm[p+1][1])/2
                new_izoterm.append([new_x,new_y])
            else: new_izoterm.append(oryginal_izotherm[p])
        return new_izoterm
        
    def make_calculation(izotherm):
        return (2*izotherm)-1
        

    list_to_be_change = copy.deepcopy(oryginal_izotherms)
    list_to_be_change = sort_isotherms(list_to_be_change)

    list_to_be_change = cut_x_range(list_to_be_change, x_range_to_cut)
    
    result = [xy.insert(0, [0,0]) for xy in list_to_be_change if not np.any(xy[0] == [0,0])]

    izotherms_x_y_normalized = []
    
    for x_y in list_to_be_change:
        izotherm_to_be_change = x_y
        old_size = len(izotherm_to_be_change)
        
        if(old_size < new_size):
            no_of_whole_iteration = 0
            temp_size = old_size
            number_of_additional_points = 0
            while(temp_size < new_size):
                if(new_size - temp_size+1 > old_size and make_calculation(temp_size) < new_size):
                    temp_size = make_calculation(temp_size)
                    no_of_whole_iteration  += 1
                else:
                    number_of_additional_points = new_size - temp_size                
                    temp_size = temp_size + (new_size - temp_size)

            new_izotherm_with_whole_iterations = izotherm_to_be_change
            for i in range(no_of_whole_iteration):
                new_izotherm_with_whole_iterations = add_new_points_in_first_iteration(new_izotherm_with_whole_iterations)
                # print(f"iteration[{i+1}] -> {len(new_izotherm_with_whole_iterations)}")
            final_izotherm =  add_new_points_in_specific_number(new_izotherm_with_whole_iterations, number_of_additional_points)
            
        elif(old_size > new_size):
            
            # Liczba punktów, które chcemy wybrać
            num_points = new_size  # na przykład wybieramy 5 punktów z 10
            indices = np.linspace(0, len(x_y) - 1, num_points, dtype=int)# Obliczanie indeksów punktów do wybrania
            final_izotherm = [x_y[i] for i in indices]# Wybieranie punktów z oryginalnej listy na podstawie indeksów
            
        elif(old_size == new_size):    
            final_izotherm = x_y

        izotherms_x_y_normalized.append(final_izotherm)

    return izotherms_x_y_normalized

def processing_isotherm_X_Y_data(new_size, x_range_to_cut):
    dff = initially_preprocessed_data_frame()
    x_y_df = dff['isotherm_X_Y'].to_list()
    izotherm_x_y_in_new_size = fit_shape_of_izotherms(x_y_df, new_size, x_range_to_cut ) # make data with even points number 
    dff['processed_isotherm_X_Y'] = izotherm_x_y_in_new_size
    X_Y_np_array = np.array(izotherm_x_y_in_new_size)
    izotherm_x_y_in_new_size_flatten = [xy.flatten() for xy in X_Y_np_array]
    dff['processed_flatten_isotherm_X_Y'] = izotherm_x_y_in_new_size_flatten
    return dff

def get_whole_preprocessed_dataframe(new_size, x_range_to_cut):
    df = processing_isotherm_X_Y_data(new_size, x_range_to_cut)
    print(df.info())
    print("------------------------------- Preprocessing_data end -------------------------------")
    return df

# df = get_whole_preprocessed_dataframe(new_size=30, x_range_to_cut=False)
