In [55]:
import pandas as pd
import numpy as np
import time
from datetime import timedelta,datetime

In [81]:
def data_split(df,well_id_int=6110299100,param_id_list=[188,200],package_size=50000, leftcut=5000):
    
    df=df.query('well_id == @well_id_int and param_id in @param_id_list')
    del df['well_id']
    # Устанавливаем tm_time как индекс
    df.set_index('tm_time', inplace=True)

    # Отсортируем по индексу
    df.sort_index(inplace=True)

    # Преобразование с помощью pivot
    df = df.pivot(columns='param_id', values='tm_value')

    # убираем верхний индекс param_id
    #df.columns.rename(None, inplace=True)

    
    # Интерполирование к частоте в 1 секунду
    df = df.resample('1s').mean().interpolate(method='linear', limit_direction='both')
    
    packages = []
    current_packet = pd.DataFrame()  # Изначально пустой пакет
    num_points = len(df)

    # Идем по ряду с шагом package_size
    for i in range(0, num_points, package_size):
        # Берем кусок данных длиной package_size
        package = df[i:i + package_size]

         # Добавляем новые точки к текущему пакету
        current_packet = pd.concat([current_packet[leftcut:], package])

        # Добавляем пакет в список
        packages.append(current_packet)

    return packages    

In [59]:
def calculate_deviations(df):
    # Создаем пустой датафрейм для результатов
    result_df = pd.DataFrame(index=df.index)
    
    for col in df.columns:
        # Выбираем конкретный столбец
        current_column = df[col]
        
        # Вычисление среднего, медианы, квартилей и дисперсии для данного столбца
        mean_value = current_column.mean()
        median_value = current_column.median()
        quartiles = current_column.quantile([0.25, 0.75])
        std_dev = current_column.std()
        
        # Отклонения от среднего
        result_df[f'{col}_Deviation from Mean'] = current_column - mean_value
        
        # Отклонения от первого квартиля
        result_df[f'{col}_Deviation from Q1'] = current_column - quartiles.loc[0.25]
        
        # Отклонения от медианы
        result_df[f'{col}_Deviation from Median'] = current_column - median_value
        
        # Отклонения от третьего квартиля
        result_df[f'{col}_Deviation from Q3'] = current_column - quartiles.loc[0.75]
        
        # Отклонения от дисперсии
        result_df[f'{col}_Deviation from Std Dev'] = current_column - std_dev
        
        # Отклонения от предыдущего значения
        result_df[f'{col}_Deviation from Previous Value'] = current_column.diff()
    
    return result_df




In [69]:
df1=pd.read_pickle('ncrptd.pkl')
df1.reset_index(inplace=True)

df=pd.DataFrame(columns=['well_id', 'param_id', 'tm_time','tm_value'])
df['well_id']=df1['wd']+2024
df['param_id']=df1['pd']+1703
df['tm_time']=df1['tt']-timedelta(days=10000)
df['tm_value']=df1['tv']/1.1

df

Unnamed: 0,well_id,param_id,tm_time,tm_value
0,6110299100,220,1969-06-29 13:14:08,0.0
1,6110299100,220,1969-06-29 13:14:38,0.0
2,6110299100,220,1969-06-29 13:15:08,0.0
3,6110299100,220,1969-06-29 13:15:38,0.0
4,6110299100,220,1969-06-29 13:16:08,0.0
...,...,...,...,...
15497320,2860194100,401,1969-06-29 23:58:03,35.7
15497321,2860194100,401,1969-06-29 23:58:33,35.1
15497322,2860194100,401,1969-06-29 23:59:03,0.0
15497323,2860194100,401,1969-06-29 23:59:04,0.0


In [83]:
list_of_dfs = data_split(df,well_id_int=6110299100,param_id_list=[188,200],package_size=50000, leftcut=5000)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_index(inplace=True)


In [85]:
list_of_dfs[0]

param_id,188,200
tm_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1969-06-29 00:00:03,30.0,74.000000
1969-06-29 00:00:04,30.0,74.033333
1969-06-29 00:00:05,30.0,74.066667
1969-06-29 00:00:06,30.0,74.100000
1969-06-29 00:00:07,30.0,74.133333
...,...,...
1969-06-29 13:53:18,31.0,75.000000
1969-06-29 13:53:19,31.0,75.000000
1969-06-29 13:53:20,31.0,75.000000
1969-06-29 13:53:21,31.0,75.000000


In [75]:
result_df = calculate_deviations(list_of_dfs[0])

result_df

Unnamed: 0_level_0,188_Deviation from Mean,188_Deviation from Q1,188_Deviation from Median,188_Deviation from Q3,188_Deviation from Std Dev,188_Deviation from Previous Value,200_Deviation from Mean,200_Deviation from Q1,200_Deviation from Median,200_Deviation from Q3,200_Deviation from Std Dev,200_Deviation from Previous Value
tm_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1969-06-29 00:00:03,-0.31332,0.0,0.0,-1.0,29.115192,,49.797530,74.000000,74.000000,0.000000,39.256239,
1969-06-29 00:00:04,-0.31332,0.0,0.0,-1.0,29.115192,0.0,49.830863,74.033333,74.033333,0.033333,39.289572,0.033333
1969-06-29 00:00:05,-0.31332,0.0,0.0,-1.0,29.115192,0.0,49.864197,74.066667,74.066667,0.066667,39.322906,0.033333
1969-06-29 00:00:06,-0.31332,0.0,0.0,-1.0,29.115192,0.0,49.897530,74.100000,74.100000,0.100000,39.356239,0.033333
1969-06-29 00:00:07,-0.31332,0.0,0.0,-1.0,29.115192,0.0,49.930863,74.133333,74.133333,0.133333,39.389572,0.033333
...,...,...,...,...,...,...,...,...,...,...,...,...
1969-06-29 13:53:18,0.68668,1.0,1.0,0.0,30.115192,0.0,50.797530,75.000000,75.000000,1.000000,40.256239,0.000000
1969-06-29 13:53:19,0.68668,1.0,1.0,0.0,30.115192,0.0,50.797530,75.000000,75.000000,1.000000,40.256239,0.000000
1969-06-29 13:53:20,0.68668,1.0,1.0,0.0,30.115192,0.0,50.797530,75.000000,75.000000,1.000000,40.256239,0.000000
1969-06-29 13:53:21,0.68668,1.0,1.0,0.0,30.115192,0.0,50.797530,75.000000,75.000000,1.000000,40.256239,0.000000
