In [None]:
import numpy as np
import pandas as pd
import os
import re
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import csv

# Paramétrage de l'affichage des dataframes
pd.set_option('display.max_columns', 12)
pd.set_option('display.max_rows', None)

In [None]:
# Détermination du temps de vieillissement à partir du nom de fichier
def age(parameters):
    compound_mapping = {
        '240208': 349,
        '240308': 378,
        '240405': 406,
        '240503': 434,
        '240607': 462,
        'frais': 0
    }
    
    for key, value in compound_mapping.items():
        if key in parameters:
            return value
    return 0

print("done")

In [None]:
# Indiquer l'emplacement des données
directory =

dataframes = []
tapes = []
ages = []
filenames = []

for filename in os.listdir(directory):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        
        try:
            df = pd.read_csv(file_path, skiprows=12, delimiter='\t', names=['Ret.Time', 'Absolute Intensity', 'Relative Intensity'], 
                             dtype={'Ret.Time': float, 'Absolute Intensity': int, 'Relative Intensity': float}, 
                             index_col='Ret.Time', decimal=',', encoding='utf-8')
        except UnicodeDecodeError:
            try:
                df = pd.read_csv(file_path, skiprows=12, delimiter='\t', names=['Ret.Time', 'Absolute Intensity', 'Relative Intensity'], 
                                 dtype={'Ret.Time': float, 'Absolute Intensity': int, 'Relative Intensity': float}, 
                                 index_col='Ret.Time', decimal=',', encoding='latin1')
            except UnicodeDecodeError:
                df = pd.read_csv(file_path, skiprows=12, delimiter='\t', names=['Ret.Time', 'Absolute Intensity', 'Relative Intensity'], 
                                 dtype={'Ret.Time': float, 'Absolute Intensity': int, 'Relative Intensity': float}, 
                                 index_col='Ret.Time', decimal=',', encoding='cp1252')

        parameters = re.split(r'[-._ ]', filename)
        tapes.append(parameters[0])
        ages.append(age(parameters[-2]))
        filenames.append(filename)
        dataframes.append(df)

data = pd.concat(dataframes, axis=1)
data = data.loc[:, ~data.columns.str.contains('Relative Intensity')]
data = data.T.reset_index(drop=True).rename_axis('Index')

data['tape'] = tapes
data['age'] = ages
data['file name'] = filenames

cols = list(data.columns)
cols = ['tape'] + [col for col in cols if col != 'tape']
data = data[cols]

data.sort_values(by=['tape', 'age'], inplace=True)
data.reset_index(drop=True, inplace=True)

data

In [None]:
# Les données n'étant pas rigoureusement collectées touttes les 0.001 min,
# seules les colonnes finissant en 0.000 ou 0.005 sont conservées,
# afin de permettre de décaler les chromato pour les supperposer
selected_columns = [col for col in data.columns[1:-2] if str(col).endswith(('5', '0'))]
data_selected = data.loc[:, ['tape'] + list(selected_columns) + ['age', 'file name']]

data_selected

In [None]:
# Représente le chromatogramme, avec possibilité de réduire la plage temporelle,
# Permet de déterminer les limites de l'axe y
def plot_TIC(data_plot_spectra, start_time=None, end_time=None, found_tr=None, y_limit_top=None, y_limit_down=None, figsize=(12, 4)):
    Tr = data_plot_spectra.columns[1:-3].astype(float)
    data_to_plot = data_plot_spectra.values[:, 1:-3]

    if start_time is not None:
        start_idx = np.searchsorted(Tr, start_time, side='left')
    else:
        start_idx = 0

    if end_time is not None:
        end_idx = np.searchsorted(Tr, end_time, side='right')
    else:
        end_idx = len(Tr)

    Tr_filtered = Tr[start_idx:end_idx]
    data_filtered = data_to_plot[:, start_idx:end_idx]

    plt.figure(figsize=figsize)
    cmap = cm.coolwarm(np.linspace(0, 0.8, len(data_filtered)))

    for row in range(len(data_filtered)):
        Int = data_filtered[row]
        color = cmap[data_plot_spectra.shape[0]-1]
        age = data_plot_spectra.values[row, data_plot_spectra.columns.get_loc('age')]
        if age == 0:
            color = 'black'
            label = 'Age: 0'
        else:
            label = f'Age: {age}'
        plt.plot(Tr_filtered, Int, color=color, linestyle='-', linewidth=0.5, alpha=1, label=label)
    
    if found_tr is not None:
            plt.axvline(x=found_tr, color='black', linestyle='--', linewidth=1, label=f'Found TR: {found_tr}')
    
    if y_limit_top is not None:
        plt.ylim(y_limit_down, y_limit_top)
    
    plt.xlabel('Temps de rétention (min)')
    plt.ylabel('Intensité')
    plt.show()

In [None]:
data_raa12 = data[data["tape"]=="raa12"]
data_raa13 = data[data["tape"]=="raa13"]
data_raa14 = data[data["tape"]=="raa14"]
data_raa15 = data[data["tape"]=="raa15"]
data_raa16 = data[data["tape"]=="raa16"]


plot_TIC(data_raa12, 1.1, 38)
plot_TIC(data_raa13, 1.1, 38)
plot_TIC(data_raa14, 1.1, 38)
plot_TIC(data_raa15, 1.1, 38)
plot_TIC(data_raa16, 1.1, 38)

In [None]:
# Applique un décalage verticale aux chromatrogrammes
def v_shift(data, factor, factor2, tape):
    data2 = data.copy()
    numeric_columns = data.columns[2:-2]
    if tape == 12:
        row = 1
    if tape == 13:
        row = 3
    if tape == 14:
        row = 5
    if tape == 15:
        row = 7
    if tape == 16:
        row = 9
    data2.loc[row, numeric_columns] = data.loc[row, numeric_columns].astype(float) * factor + factor2
    return data2

# Annexes 16, 18, 20, 22, 24
print('RAA12')
data_raa12_vshifted = v_shift(data_raa12, 20, 50000000, 12)
plot_TIC(data_raa12_vshifted, 1.11, 38, y_limit_top=150000000, figsize=(12, 6))

print('RAA13')
data_raa13_vshifted = v_shift(data_raa13, 2, 20000000, 13)
plot_TIC(data_raa13_vshifted, 1.11, 38, figsize=(12, 6))

print('RAA14')
data_raa14_vshifted = v_shift(data_raa14, 5, 30000000, 14)
plot_TIC(data_raa14_vshifted, 1.11, 38, y_limit_top=70000000, figsize=(12, 6))

print('RAA15')
data_raa15_vshifted = v_shift(data_raa15, 2, 20000000, 15)
plot_TIC(data_raa15_vshifted, 1.11, 38, figsize=(12, 6))

print('RAA16')
data_raa16_vshifted = v_shift(data_raa16, 2, 20000000, 16)
plot_TIC(data_raa16_vshifted, 1.11, 38, figsize=(12, 6))

## Plot

In [None]:
# Figure 14
concatenated_data = pd.concat([data_raa12_vshifted, data_raa14_vshifted], ignore_index=True)

numeric_columns = concatenated_data.columns[2:-2]

concatenated_data.loc[1, numeric_columns] = concatenated_data.loc[1, numeric_columns].astype(float) - 20000000
concatenated_data.loc[2, numeric_columns] = concatenated_data.loc[2, numeric_columns].astype(float) + 80000000
concatenated_data.loc[3, numeric_columns] = concatenated_data.loc[3, numeric_columns].astype(float) + 80000000

plot_TIC(concatenated_data, 1.11, 38, y_limit_top=150000000, y_limit_down=-10000000, figsize=(12, 3))

In [None]:
# Figure 18
concatenated_data = pd.concat([data_raa13_vshifted, data_raa15_vshifted, data_raa16_vshifted], ignore_index=True)

numeric_columns = concatenated_data.columns[2:-2]

concatenated_data.loc[1, numeric_columns] = concatenated_data.loc[1, numeric_columns].astype(float) + 0
concatenated_data.loc[2, numeric_columns] = concatenated_data.loc[2, numeric_columns].astype(float) + 60000000
concatenated_data.loc[3, numeric_columns] = concatenated_data.loc[3, numeric_columns].astype(float) + 60000000
concatenated_data.loc[4, numeric_columns] = concatenated_data.loc[4, numeric_columns].astype(float) + 120000000
concatenated_data.loc[5, numeric_columns] = concatenated_data.loc[5, numeric_columns].astype(float) + 120000000

plot_TIC(concatenated_data, 1.11, 38, y_limit_top= 200000000, y_limit_down=-10000000, figsize=(12, 4))