In [None]:
import pandas as pd
from pandas import DataFrame,Series
import math
import copy
from sklearn.preprocessing import MinMaxScaler
from scipy import signal
from numpy import ndarray
from pathlib import Path as P
from typing import Any
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
from scipy.special import voigt_profile
import numpy as np
from functools import partial
from pydantic import BaseModel
from scipy.optimize import minimize
import nest_asyncio
from scipy.spatial.distance import cdist
from scipy.sparse import csr_matrix
nest_asyncio.apply()

class UVdata(BaseModel):
    name: str
    raw_arr: list[list[int|float]]
    peaks_arr: list[int|float] | None = []

# 定义要拟合的函数列表
class peak_funcs:
    
    @staticmethod
    def exp(x, a, b, c):
        return a * np.exp(b * (x-c))
    
    @staticmethod
    def gauss(x, A, mu, sigma):
        return A * np.exp(-(x - mu)**2 / (2 * sigma**2))
    
    @staticmethod
    def lorentz(x, A, mu, gamma):
        return A / (1 + ((x - mu) / gamma)**2)
    
    @staticmethod
    def voigt(x, A, mu, sigma, gamma):
        return A * voigt_profile(x - mu, sigma, gamma)

models = [getattr(peak_funcs, i) for i in dir(peak_funcs) if not i.startswith('__')]

# 定义损失函数
def loss(params, x, y, func):
    y_pred = func(x, *params)
    diff = y - y_pred
    diff = np.nan_to_num(diff)
    diff = np.where(np.isnan(diff), 0, diff)
    diff = np.where(np.isposinf(diff), 1.0, diff)
    diff = np.where(np.isneginf(diff), -1.0, diff)
    loss_v = np.sum(np.abs(diff) + np.maximum(0, 10*(diff)))
    return loss_v

# 读取excel
def get_data_from_excel(file:P)-> dict:
    filename: str= file.stem
    df: DataFrame = pd.read_excel(file, header=None)
    df.dropna(axis=0, how='all', inplace=True)
    df.dropna(axis=1, how='all', inplace=True)
    mask: DataFrame = df.applymap(lambda x: 'nm' == str(x).strip())    ## 在df中寻找值为字符串'nm'索引
    mask_series: DataFrame | Series = mask.stack()                           ## 二维数据打平转化为series(row,col,value)
    indices: list[Any] = mask_series[lambda x: x].index # type: ignore
    if len(indices) > 1:
        raise Exception(f'{filename}: 存在多个值为nm的单元格{indices}')
    elif len(indices) == 0:
        raise Exception(f'{filename}: 不存在值为nm的单元格')
    else:
        arr: ndarray = df.loc[indices[0][0]+1:,indices[0][1]:].to_numpy()
        return {'name':filename, 'raw_arr': arr}

# 数据前处理
def pre_process(data:ndarray)-> ndarray:
    scaler = MinMaxScaler()
    arr_normalized = scaler.fit_transform(data.reshape(-1,1)).reshape(-1)
    arr_normalized = signal.savgol_filter(arr_normalized, window_length=10, polyorder=2)
    return arr_normalized.tolist(), scaler # type: ignore

## 寻找峰值
def get_peaks(data:ndarray, threshold=10)-> list[int]:

    peaks_normal: ndarray
    _property:dict
    peaks_normal, _property = signal.find_peaks(data, prominence=0.002, distance=10)
    peaks_cwt: ndarray = signal.find_peaks_cwt(data, np.arange(1, 10), min_length=4, min_snr=1)
    ## 合并去重,过滤低值
    peaks_merged: list[Any] = sorted(list(set(peaks_normal.tolist() + peaks_cwt.tolist())))
    peaks=[i for i in peaks_merged if data[i] > 0.05]
    if len(peaks) == 0:
        return []
    ## 筛选主峰
    diffs = np.diff(peaks)
    separators = np.where(diffs >= threshold)[0] + 1
    subarrays= np.split(peaks, separators)
    peaks=[]
    ## 密集区域稀疏化
    for sub in subarrays:
        if len(sub) == 1:
            sub = sub[0]
        else:
            value_in_peaks_normal =np.array([i for i in sub if i in peaks_normal])
            if len(value_in_peaks_normal) == 0:
                sub = int(sub.mean())
            else:
                index = np.argmin(value_in_peaks_normal - sub.mean())
                sub= value_in_peaks_normal[index]
        peaks.append(sub)
    # print('peaks:',peaks)
    return peaks

# 迭代寻找峰值主函数
def iter_peaks(x_data, y_data, iter_num:int|None = None, results:list[dict] = []) -> list[dict]:
    """
    find the best fitting model for each peak.

    Args:
        x_data: The x-axis data points.
        y_data: The y-axis data points.
        iter_num: 最大迭代次数 (optional).
        results: 输出的结果 (optional).

    Returns:
        A list of fitting results, where each result contains:
            - name: The name of the model used for fitting.
            - params: The optimal parameters found for the model.
    """
    try:
        # 识别峰位
        peak_indexs = get_peaks(y_data)
        peaks_num = len(peak_indexs)
        if peaks_num == 0:
            return results
        iter_num = iter_num if iter_num else peaks_num

        # 计算最高峰位的相关信息
        scale = len(y_data)
        max_peak_index= np.argmax(y_data[peak_indexs])
        max_intensity = y_data[peak_indexs[max_peak_index]]
        center = peak_indexs[max_peak_index] / scale
        _width_scipy=signal.peak_widths(y_data, [peak_indexs[max_peak_index]], rel_height=0.5)[0][0] / scale
        width = _width_scipy if _width_scipy > 0.02 else 0.02

        # 设置不同模型拟合函数和初猜值
        tasks = []
        for model in models:
            initial_func_guess=[]
            if model.__name__ in ['gauss','lorentz']:
                initial_func_guess = [max_intensity,center,width]
            elif model.__name__ == 'voigt':
                initial_func_guess = [max_intensity/4, center, width-0.01, width/2-0.01]
            elif model.__name__ == 'exp':
                initial_func_guess = [1.0, -10.0, -0.01]
            params = {
                'fun': partial(loss, func=model),
                'x0': initial_func_guess,
                'args': (x_data, y_data)
            }
            tasks.append({'name': model.__name__, 'params': copy.deepcopy(params)})

        # 并行加速运行拟合函数，并行失败，待研究
        ## task_results = Parallel(n_jobs=-1)(delayed(minimize)(**task['params']) for task in tasks)
        task_results=[minimize(**task['params']) for task in tasks]

        # 过滤拟合失败的结果
        task_results_filtered= [result for result in task_results if not math.isnan(result.fun)]

        # 选择拟合最好的模型
        optimal_fit_info = min(task_results_filtered, key=lambda x: x.fun)
        optimal_index = task_results.index(optimal_fit_info)
        optimal_params= optimal_fit_info.x
        model_func = models[optimal_index]

        # 保存当前拟合的最优模型参数
        results.append({
            'name': model_func.__name__,
            'params': optimal_params,
        })

        # 初始数据减去拟合函数的值，生成新的待拟合数据
        y_fit= model_func(x_data, *optimal_params)
        y_new = y_data - y_fit

        # 递归拟合上一步的残差, 直至iter_num == 0
        iter_num -= 1
        if iter_num != 0:
            return iter_peaks(x_data, y_new, iter_num, results)
        else:
            return results
    except Exception as e:
        print(f'peak process error in the {iter_num} iteration: {e}')
        return results

In [None]:
p_source= P('./dataCheck/spectrum/UV统一格式').glob('**/*.xlsx')
results:Any= Parallel(n_jobs=-1)(delayed(get_data_from_excel)(i) for i in p_source)
_results:Any = copy.deepcopy(results)

In [None]:
results[1]

In [None]:
data4pkl ={}
for i in results:
    # 前处理数据
    data = i['raw_arr'][:,1]
    y_data, scaler= pre_process(data[::-1])
    # scaler.inverse_transform(y_data.reshape(-1, 1)).reshape(-1)
    x_data = np.linspace(0, 1, len(y_data))
    
    # 拿到峰值
    try:
        peaks_indices = get_peaks(y_data)
        peaks_arr= np.zeros(401)
        peaks_arr[peaks_indices] = np.array(y_data)[peaks_indices]
        i['peaks_arr'] = peaks_arr
        data4pkl[i['name']] = {'name':i['name'], 'raw_arr':y_data,'peaks_arr': peaks_arr.tolist()}
    except Exception as e:
        print('error:',i.keys(), e)

In [None]:
len(data4pkl['B2']['raw_arr'])
#data4pkl['B2'].model_dump()
data4pkl['B2']

In [None]:
## 写入
import pickle
with open('uv_data.pkl', 'wb') as f:
    pickle.dump(data4pkl, f)

In [None]:
## 读取
import pickle
with open('uv_data_with_types.pkl', 'rb') as f:
    data_pkl=pickle.load(f)

In [None]:
all_arr: list[Any] = [data_pkl[i].peaks_arr for i in data_pkl]
matrix = np.concatenate(all_arr).reshape(-1,401)
data_matrix_sparse = csr_matrix(matrix)

# 稀疏数组
target_array_sparse = csr_matrix(data_pkl['B2'].peaks_arr)  # (1, 401)

# 将稀疏矩阵转换为密集格式
data_matrix_dense1 = data_matrix_sparse.toarray()
target_array_dense1 = target_array_sparse.toarray()

# 计算余弦相似度
similarities = 1 - cdist(target_array_dense1, data_matrix_dense1, metric='cosine')

In [None]:

similarities.reshape(-1).tolist()

In [2]:
def timetest(f):
    def wrap(*args, **kwargs):
        import time
        start = time.time()
        result = f(*args, **kwargs)
        end = time.time()
        print(f"Time taken: {end - start} seconds")
        return result
    return wrap

@timetest
def f(a,b):
    out = [i for i in a if i in b]
    return out

In [9]:
a = set([f'str-{i}' for i in range(10000)])
b = set([f'str-{i}' for i in range(0,10000,20)])

In [12]:
out = f(a,b)

Time taken: 0.0010004043579101562 seconds


In [1]:
import heapq

# 创建一个空堆
heap = []

# 向堆中添加元素
heapq.heappush(heap, 10)
heapq.heappush(heap, 1)
heapq.heappush(heap, 5)

# 查看堆中的元素
print("Heap:", heap)

# 弹出最小的元素
min_item = heapq.heappop(heap)
print("The minimum element:", min_item)

# 添加一个元素，然后立即弹出最小的元素
item = heapq.heappushpop(heap, 2)
print("Item pushed and popped:", item)

# 将一个列表转换为堆
list_for_heap = [20, 14, 2, 15, 10, 21]
heapq.heapify(list_for_heap)
print("List converted to Heap:", list_for_heap)

# 替换最小的元素
min_item_replaced = heapq.heapreplace(list_for_heap, 100)
print("Minimum element replaced:", min_item_replaced)
print("Heap after replacement:", list_for_heap)

# 获取最大的三个元素
largest = heapq.nlargest(3, list_for_heap)
print("Three largest elements:", largest)

# 获取最小的三个元素
smallest = heapq.nsmallest(3, list_for_heap)
print("Three smallest elements:", smallest)


Heap: [1, 10, 5]
The minimum element: 1
Item pushed and popped: 2
List converted to Heap: [2, 10, 20, 15, 14, 21]
Minimum element replaced: 2
Heap after replacement: [10, 14, 20, 15, 100, 21]
Three largest elements: [100, 21, 20]
Three smallest elements: [10, 14, 15]


In [25]:

def trailingZeroes( n: int) :
    p=1
    ans=0
    for i in range(1,n+1):
        p=p*i
        print(p)
        if str(p)[-1]=='0':
            ans+=1
            p=int(p/10)
    return ans        

    

In [26]:
trailingZeroes(40)

1
2
6
24
120
72
504
4032
36288
362880
399168
4790016
62270208
871782912
13076743680
20922789888
355687428096
6402373705728
121645100408832
2432902008176640
5109094217170944
112400072777760768
2585201673888497664
62044840173323943936
1551121004333098598400
4032914611266056355840
10888869450418352160768
304888344611713860501504
8841761993739701954543616
265252859812191058636308480
822283865417792249380470784
26313083693369351980175065088
868331761881188615345777147904
29523279903960412921756423028736
1033314796638614452261474806005760
3719933267899011782369124412293120
13763753091226344927831250027151360
52302261746660113463947323544436736
2039788208119744425093945618233032704
81591528324789777003757824729321308160


11

In [None]:
import numpy as np

# 生成一个随机的目标数组
target_array = np.random.randint(0, 2, size=(401,))

# 生成20000个随机的数组，放入一个大的数组中
array_set = np.random.randint(0, 2, size=(2000000, 401))

# 计算目标数组与每个数组的相似度（汉明距离）
hamming_distances = np.sum(target_array != array_set, axis=1)

# 找到最相似的数组的索引
most_similar_index = np.argmin(hamming_distances)

# 输出结果
print("最相似的数组的索引：", most_similar_index)
print("最相似的数组：", array_set[most_similar_index])
print("汉明距离：", hamming_distances[most_similar_index])

In [None]:
s=np.array([[1,2,3,{}]])
isinstance(s,np.ndarray)

In [None]:
models[2](0.3,*[0.24982069672131227, 0.5336658354114713, 0.26760417134639103])

In [None]:
data = list(_results[4].values())[0][1:,1]

# 前处理数据
y_data, scaler= pre_process(data[::-1])
# scaler.inverse_transform(y_data.reshape(-1, 1)).reshape(-1)
x_data = np.linspace(0, 1, len(y_data))

# 拟合
result = iter_peaks(x_data, y_data)
print(result)

In [None]:
print(result)

In [None]:
diffs = np.diff([])
separators = np.where(diffs >= 2)[0] + 1
print(separators)
subarrays= np.split([], separators)

In [None]:
subarrays

In [None]:
# 定义要拟合的函数列表
class peak_funcs:
    
    @staticmethod
    def exp(x, a, b, c):
        return a * np.exp(b * (x-c))
    
    @staticmethod
    def gauss(x, A, mu, sigma):
        return A * np.exp(-(x - mu)**2 / (2 * sigma**2))
    
    @staticmethod
    def lorentz(x, A, mu, gamma):
        return A / (1 + ((x - mu) / gamma)**2)
    
    @staticmethod
    def voigt(x, A, mu, sigma, gamma):
        return A * voigt_profile(x - mu, sigma, gamma)

models = [getattr(peak_funcs, i) for i in dir(peak_funcs) if not i.startswith('__')]

def loss(params, x, y, func):
    y_pred = func(x, *params)
    loss_v = np.sum((y - y_pred) + np.maximum(0.001, 10*(y_pred - y)))
    return loss_v

tasks = []
for model in models:
    initial_func_guess = [1.0, -10.0, -0.01]
    params = {
        'fun': loss,
        'x0': initial_func_guess,
        'args':(x_data, y_data, model)
    }
    tasks.append({'name': model.__name__, 'params': copy.deepcopy(params)})
# 并行加速运行拟合函数
#task_results=[minimize(**task['params']) for task in tasks]
task_results = Parallel(n_jobs=-1)(delayed(minimize)(**task['params']) for task in tasks)


In [None]:
import numpy as np
from scipy import signal
import copy
_results=copy.deepcopy(results)
data = list(_results[13].values())[0][1:,1]
y_data, scaler= pre_process(data[::-1])
# scaler.inverse_transform(y_data.reshape(-1, 1)).reshape(-1)
x_data = np.linspace(0, 1, len(y_data))
peak_indexs: list[int] = get_peaks(y_data)
plt.plot(x_data,y_data)
plt.plot(x_data[peak_indexs], y_data[peak_indexs], "x")
plt.show()


In [None]:
import numpy as np
from functools import partial
from scipy.optimize import minimize

# 定义要拟合的函数列表
models = [getattr(peak_funcs, i) for i in dir(peak_funcs) if not i.startswith('__')]

# 定义损失函数
def loss(params, x, y, func):
    y_pred = func(x, *params)
    loss_v = np.sum((y - y_pred) + np.maximum(0.001, 10*(y_pred - y)))
    return loss_v

# 前处理数据，获取峰值属性
y_data, scaler= pre_process(data[::-1])
# scaler.inverse_transform(y_data.reshape(-1, 1)).reshape(-1)
x_data = np.linspace(0, 1, len(y_data))


def iter_peaks(x_data, y_data, iter_num:int|None = None, results:list[dict] = []) -> list[dict]:
    
    try:
        # 识别峰位
        peak_indexs = get_peaks(y_data)
        iter_num = iter_num if iter_num else len(peak_indexs)

        # 计算最高峰位的相关信息
        scale = len(y_data)
        max_peak_index= np.argmax(y_data[peak_indexs])
        max_intensity = y_data[peak_indexs[max_peak_index]]
        center = peak_indexs[max_peak_index] / scale
        _width_scipy=signal.peak_widths(y_data, [peak_indexs[max_peak_index]], rel_height=0.5)[0][0] / scale
        width = _width_scipy if _width_scipy > 0.02 else 0.02

        # 设置不同模型拟合函数和初猜值
        tasks = []
        for model in models:
            if model.__name__ in ['gauss','lorentz']:
                initial_func_guess = [max_intensity,center,width]
            elif model.__name__ == 'voigt':
                initial_func_guess = [max_intensity/4, center, width-0.01, width/2-0.01]
            elif model.__name__ == 'exp':
                initial_func_guess = [1.0, -10.0, -0.01]
            params = {
                'fun':partial(loss, func=model),
                'x0':initial_func_guess,
                'args':(x_data, y_data)
            }
            tasks.append({'name': model.__name__, 'params': params})

        # 并行加速运行拟合函数
        task_results = Parallel(n_jobs=-1)(delayed(minimize)(**task['params'])  for task in tasks)

        # 过滤拟合失败的结果
        task_results_filtered= [result for result in task_results if not math.isnan(result.fun)]

        # 选择拟合最好的模型
        optimal_fit_info = min(task_results_filtered, key=lambda x: x.fun)
        optimal_index = task_results.index(optimal_fit_info)
        optimal_params= optimal_fit_info.x
        model_func = models[optimal_index]

        # 初始数据减去拟合函数的值，生成新的待拟合数据
        y_fit= model_func(x_data, *optimal_params)
        y_new = y_data - y_fit

        
        iter_num -= 1
    except Exception as e:
        print(f'peak process error in the {iter_num} iteration: {e}')

    if iter_num != 0:
        # Recursively fit the remaining peaks
        return iter_peaks(x_data, y_new, iter_num, results)
    else:
        return results




## 使用偏函数设定每个peak_fun的损失函数
# loss_partials = [partial(loss, func=model) for model in models]

# initial_func_guess = [1,0.2,0.5]
# result = minimize(loss_partials[1], initial_func_guess, args=(np.linspace(0, 1, len(data_arr)), data_arr))

print(results)

In [None]:
results_index=1
y_pred=models[results_index](np.linspace(0, 1, len(y_data)),*results[results_index].x)#*fit_results[1]['params'])
plt.plot(np.linspace(0, 1, len(y_data)),y_data)
plt.plot(np.linspace(0, 1, len(y_data)), y_pred, "r")
plt.show()
print(models[3])

In [None]:
x_data = np.linspace(0, 1, len(y_data))
new_data = [y_data - models[i](x_data, *v.x) for i,v in enumerate(results)]
errors=[sum(i) for i in new_data]
errors

In [None]:
plt.plot(np.linspace(0, 1, len(y_data)), y_data-y_pred)

In [None]:
import numpy as np
from scipy.optimize import minimize

# 定义要拟合的函数
def func(x, a, b, c):
    return a * np.exp(-b * x) + c

# 生成一些模拟数据
x_data = np.linspace(0, 4, 50)
y = func(x_data, 2.5, 1.3, 0.5)
np.random.seed(1729)
y_noise = 0.2 * np.random.normal(size=x_data.size)
y_data = y + y_noise

# 定义自定义的损失函数
def custom_loss_function(params, x, y):
    a, b, c = params
    y_pred = func(x, a, b, c)
    # 这里可以根据需要定义自己的损失函数，比如最大似然估计等
    # 这里使用简单的平方损失作为示例
    print(y_pred)
    loss = np.sum((y - y_pred)**2)
    return loss

# 使用minimize进行拟合，传入自定义的损失函数
initial_guess = [1.0, 1.0, 1.0]
result = minimize(fun=custom_loss_function, x0=initial_guess, args=(x_data, y))

# 输出拟合的参数
print(result.x)

In [None]:
peak_property

In [None]:
signal.peak_prominences(data[:,1], [248, 269, 289, 305, 383],)

In [None]:
initial_guess = []
for i in peakind:
    width=signal.peak_widths(data_arr, [i], rel_height=0.5)[0][0]
    height=signal.peak_prominences(data_arr, [i])[0][0]
    center = i
    amplitude = height if height != 0 else 0.1
    sigma = width/2.355 if width != 0 else 0.1
    gamma = sigma/2 if width != 0 else 0.1
    initial_guess.extend([center,amplitude,sigma,gamma])
    print(center,amplitude,sigma,gamma)

In [None]:
peak_property,data[:,0][peakind]

In [None]:
results_half=signal.peak_widths(data[:,1], peakind, rel_height=0.5)
plt.plot(data[:,1])
plt.plot(peakind, data[:,1][peakind], "x")
plt.hlines(*results_half[1:], color="C2")
plt.show()

In [None]:
peakind

In [None]:
signal.peak_widths(data[:,1], [185], rel_height=0.5)

In [None]:
signal.peak_prominences(data[:,1],peakind)

In [None]:
results_full = signal.peak_widths(data[:,1], [185], rel_height=1)
results_full

In [None]:
data[:,0][peakind],len(peakind)

In [None]:
import numpy as np
from scipy.optimize import curve_fit
from scipy.special import wofz

# 定义Voigt函数
def voigt(x, center, amplitude, sigma, gamma):
    """
    Voigt函数是高斯函数和洛伦兹函数的卷积。
    center: 峰的中心位置
    amplitude: 峰的高度
    sigma: 高斯分量的标准偏差
    gamma: 洛伦兹分量的半宽度
    """
    z = ((x-center) + 1j*gamma) / (sigma*np.sqrt(2))
    return amplitude * np.real(wofz(z)) / (sigma*np.sqrt(2*np.pi))

# 构建多个Voigt峰的组合函数
def multiple_voigt(x, *params):
    """
    params: 一个包含所有Voigt峰参数的列表，每个Voigt峰需要4个参数: center, amplitude, sigma, gamma
    """
    y = np.zeros_like(x,dtype=np.float64)
    for i in range(0, len(params), 4):
        center = params[i]
        amplitude = params[i+1]
        sigma = params[i+2]
        gamma = params[i+3]
        y += voigt(x, center, amplitude, sigma, gamma)
    return y

# 假设的光谱数据及其噪声
xdata = range(len(data_arr))
ydata = data_arr

# 初始猜测
initial_guess = []
for i in peakind:
    width=signal.peak_widths(data_arr, [i], rel_height=0.5)[0][0]
    height=signal.peak_prominences(data_arr, [i])[0][0]
    amplitude = 20*data_arr[i]
    sigma = width/2.355 if width != 0 else 10
    gamma = sigma/16 if width != 0 else 1
    initial_guess.extend([center,amplitude,sigma,gamma])
    print(center,amplitude,sigma,gamma)

# 执行拟合
popt, pcov = curve_fit(multiple_voigt, xdata, ydata, p0=initial_guess, maxfev=10000,method='trf')

# 输出最优拟合参数
print(popt)

In [None]:
initial_guess = []
for i in peakind:
    width=signal.peak_widths(data_arr, [i], rel_height=0.5)[0][0]
    height=signal.peak_prominences(data_arr, [i])[0][0]
    center = i
    amplitude = 10*data_arr[i]*(1+height)
    sigma = width if width != 0 else 10
    gamma = sigma/100 if width != 0 else 0.02
    initial_guess.extend([center,amplitude,sigma,gamma])
    print(center,amplitude,sigma,gamma)


In [None]:
plt.plot(xdata,ydata, "c")
plt.plot(xdata,multiple_voigt(xdata,*initial_guess), "r")
plt.show()

In [None]:
sss=np.zeros_like([5,2])
type(sss[0])

In [None]:
import logger_config

logger = logger_config.get_logger(__name__)
logger.debug('This is a debug message')

In [None]:
import sys
sys.path

In [None]:
def fullJustify(words: list[str], maxWidth: int) -> list[str]:
    res=[]
    cur_len=len(words[0])
    line=words[0]
    items_list=[words[0]]
    for s in words[1:]:
        n=len(s)
        if cur_len + 1 + n <= maxWidth:
            line = line + ' ' + s
            cur_len = cur_len + 1 + n 
            items_list.append(s)
        else:
            extra_space=maxWidth - cur_len
            if extra_space != 0:
                if len(items_list) > 1:
                    base_space = extra_space//(len(items_list)-1)
                    left_space = extra_space%(len(items_list)-1)
                    line = (' '*(base_space+2)).join(items_list[:left_space+1]) + ' '*(base_space+1)+ (' '*(base_space+1)).join(items_list[left_space+1:])
                    print(len((' '*(base_space+2)).join(items_list[:left_space+1])),(' '*(base_space+2)).join(items_list[:left_space+1]),len((' '*(base_space+1)).join(items_list[left_space+1:])),(' '*(base_space+1)).join(items_list[left_space+1:]))
                else:
                    line = line + extra_space*' '
            res.append(line)
            cur_len=n
            line=s
            items_list=[s]
    res.append(' '.join(line.strip().split()))
    return res

In [None]:
fullJustify(words=["Science","is","what","we","understand","well","enough","to","explain","to","a","computer.","Art","is","everything","else","we","do"], maxWidth=20)

In [None]:
len('This    is    an')

In [None]:

from enumSmiles.utils import enumSetting,core,enum_atoms_smiles,enumData,molecule
data=enumData(
    core=core(
        id=16,
        smiles='C1(C=CC=C2)=C2C(C=CC=C3)=C3S1',
        enumAtoms={
            0: enumSetting(array=[10, 9, 8], range=[1, 3], connect2index=[0], keepSame2Index=[]), 
            2: enumSetting(array=[], range=[1, 3], connect2index=[2, 0], keepSame2Index=[])
        },
        enumBonds={}
    ),
    ligands=[
        molecule(id=1, smiles='C1=CC1', atoms={0: [0]}, bonds={}), 
        molecule(id=2, smiles='C1CCC1', atoms={0: [0]}, bonds={}), 
        molecule(id=3, smiles='C1CCCC1', atoms={2: [2]}, bonds={})
    ])

In [None]:
res = await enum_atoms_smiles(data)
res

In [None]:
item=[1,2,3]
d= {'a':item,'b':item}
d['a'][2]=0
item,d

In [None]:
with open('./test.txt', 'w') as f:
    lines = f.readlines()
    for line in lines:
        line = line.strip()
        if line == '':
            continue
        f.write(line + '\n')
        

In [None]:
from datetime import datetime
stamp=datetime.now().timestamp()
def get_date_from_timestamp(timestamp):
    return datetime.strftime(timestamp,"%Y%m%d")
get_date_from_timestamp(stamp)


In [None]:
from datetime import datetime

def get_date_from_timestamp(timestamp):
    date_time = datetime.fromtimestamp(timestamp)
    return date_time.strftime("%Y%m%d")

# Get the current timestamp
stamp = datetime.now().timestamp()

# Get the date from the timestamp
formatted_date = get_date_from_timestamp(stamp)
print(formatted_date)

In [None]:
from pathlib import Path as P

In [None]:
path=P('./root/task')
path.mkdir(mode=0o777, parents=True, exist_ok=True)

In [None]:
s='////// /s/d/2/'.strip(' /')
s

In [None]:
p = P('./my_text_file/ssd/sddds')
s=p.absolute().as_posix()
s

In [None]:
import re
s="""Job Id: 979305.mu01
    Job_Name = g16
    Job_Owner = root@mu01
    resources_used.cput = 27:37:35
    resources_used.mem = 20069212kb
    resources_used.vmem = 36359272kb
    resources_used.walltime = 00:52:23
    job_state = R
    queue = que
    server = mu01
    Checkpoint = u
    ctime = Wed Jan 31 15:42:53 2024
    Error_Path = mu01:/home/g16/HT/240124-RDF-0008/second/test/240124-RDF-0008
       -S1-T1.gjf/g16.e979305
    4exec_host = cu18/31+cu18/30+cu18/29+cu18/28+cu18/27+cu18/26+cu18/25+cu18/2
        4+cu18/23+cu18/22+cu18/21+cu18/20+cu18/19+cu18/18+cu18/17+cu18/16+cu18
        /15+cu18/14+cu18/13+cu18/12+cu18/11+cu18/10+cu18/9+cu18/8+cu18/7+cu18/
        6+cu18/5+cu18/4+cu18/3+cu18/2+cu18/1+cu18/0
    exec_port = 15003+15003+15003+15003+15003+15003+15003+15003+15003+15003+15
        003+15003+15003+15003+15003+15003+15003+15003+15003+15003+15003+15003+
        15003+15003+15003+15003+15003+15003+15003+15003+15003+15003
"""
ctime = re.search(r'ctime = (.*)', s).group(0)
ctime

In [None]:
ctime = re.search(r'ctime = (.*)', s).group(0)
ctime


In [None]:
lines[1].split()[2]

In [None]:
from datetime import datetime

ctime_str = "Wed Jan 31 15:42:53 2024"
format_string = "%a %b %d %H:%M:%S %Y"

# 解析字符串为datetime对象
ctime_datetime = datetime.strptime(ctime_str, format_string)

print(ctime_datetime.timestamp())