In [1]:
import pandas as pd
import plotly.graph_objs as go
from scipy.stats import pearsonr
import numpy as np

import plotly.io as pio

In [2]:
import pandas as pd

# Load the DataFrame from the pickle file
df = pd.read_pickle('M2717_database.pkl')

In [3]:
unique_uuids = set()

for i in range(len(df)):
    unique_uuids.add(df.iloc[i]["meta"]["uuid"])

# Print all unique uuid values
for uuid in unique_uuids:
    print(uuid)

44332625541024
207137655863200
185284862261152
97186493085600
39714511164468


In [4]:
# Filter the original DataFrame for each span
df_span1 = df[df['meta'].apply(lambda x: x['uuid']) == 185284862261152].copy()
df_span2 = df[df['meta'].apply(lambda x: x['uuid']) == 44332625541024].copy()
df_span3 = df[df['meta'].apply(lambda x: x['uuid']) == 39714511164468].copy()
df_span4 = df[df['meta'].apply(lambda x: x['uuid']) == 207137655863200].copy()
df_span5 = df[df['meta'].apply(lambda x: x['uuid']) == 97186493085600].copy()

df_spans = [df_span1, df_span2, df_span3, df_span4, df_span5]

In [5]:
def flatten_dict(d, parent_key='', sep='_'):
    """
    Recursively flattens a nested dictionary.
    """
    items = []
    for k, v in d.items():
        new_key = f'{parent_key}{sep}{k}' if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def flatten_row(row):
    # Flatten 'measurements'
    measurements = row['measurements']
    flattened_measurements = {}
    for sensor, values in measurements.items():
        for pv_key, pv_value in values.items():
            flattened_measurements[f'{sensor}.{pv_key}'] = pv_value

    # Flatten 'meta' with nested dictionaries
    meta = row['meta']
    flattened_meta = flatten_dict(meta, parent_key='meta')

    # Flatten 'time' with nested dictionaries
    time = row['time']
    flattened_time = flatten_dict(time, parent_key='time')

    # Combine all flattened data
    flattened_data = {**flattened_measurements,
                      **flattened_meta, **flattened_time}
    return pd.Series(flattened_data)


# Apply the updated flattening function to each DataFrame in df_spans
df_spans_flattened = []
for df in df_spans:
    df_flattened = df.apply(flatten_row, axis=1)
    df_final = pd.concat(
        [df.drop(['measurements', 'meta', 'time'], axis=1), df_flattened], axis=1)
    df_spans_flattened.append(df_final)

In [6]:
for df in df_spans_flattened:
    cols_to_drop = []
    for col in df.columns:
        if col[0].isdigit():  # Check if column starts with a number
            if col.startswith('31'):
                # For columns starting with 31, drop if it ends with pv0 or pv3
                if col.endswith('pv0') or col.endswith('pv3'):
                    cols_to_drop.append(col)
            elif col.startswith('3') or col.startswith('4'):
                # For columns starting with 3 or 4, drop only if it ends with pv3
                if col.endswith('pv3'):
                    cols_to_drop.append(col)
            else:
                # For other columns, drop if it ends with pv2 or pv3
                if col.endswith('pv2') or col.endswith('pv3'):
                    cols_to_drop.append(col)
    df.drop(columns=cols_to_drop, inplace=True)


columns_to_drop = ['meta_modem_localIP', 'meta_modem_modemInfo',
                   'meta_modem_operator', 'meta_modem_simCCID', "meta_location"]

for df in df_spans_flattened:
    df.drop(columns=columns_to_drop, inplace=True)

In [7]:
import pandas as pd
import plotly.graph_objs as go
import plotly.subplots as sp

# Assuming df_spans_flattened is a list of DataFrames
df_plot = df_spans_flattened[0]
df_plot['time_server_UTC'] = pd.to_datetime(df_plot['time_server_UTC'])

# Create subplots
fig_combined = sp.make_subplots(rows=1, cols=1)

average_set = "12h"  # Use lowercase 'h' instead of 'H'
# Assuming df_spans_flattened is a list of DataFrames
for index, df_plot in enumerate(df_spans_flattened):
    df_plot['time_server_UTC'] = pd.to_datetime(df_plot['time_server_UTC'])

    # Resample to get hourly averages
    hourly_avg = df_plot.resample(average_set, on='time_server_UTC')[
        'meta_power_battery_V'].mean()

    # Add a trace for each DataFrame with a unique legend name
    trace = go.Scatter(x=hourly_avg.index, y=hourly_avg.values,
                       mode='lines', name=f'Pole mosta - {index+1}')
    fig_combined.add_trace(trace)

# Update layout to place legend on top, change background color, use "T10" color scheme, and set aspect ratio to 3:2
fig_combined.update_layout(title='Hladiny baterií systému',
                           xaxis_title='Dátum', yaxis_title=f'Priemerná hodnota batérie - {average_set} interval',
                           xaxis=dict(tickangle=45),
                           legend=dict(orientation='h', yanchor='bottom',
                                       y=1.02, xanchor='right', x=1),
                           plot_bgcolor='#EEEEEE',
                           colorway=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
                                     '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'],
                           width=900, height=500)  # Set width and height for 3:2 aspect ratio

# Show the combined plot
fig_combined.show()

In [8]:
import pandas as pd
import plotly.express as px

df_sens = df_spans_flattened[0]
df_ref = df_spans_flattened[3]

average_set = "12h"

# Convert 'time_server_UTC' to datetime and resample
df_sens['time_server_UTC'] = pd.to_datetime(df_sens['time_server_UTC'])
df_ref['time_server_UTC'] = pd.to_datetime(df_ref['time_server_UTC'])

# Resample and compute mean for the relevant columns
df_sens_resampled = df_sens.resample(average_set, on='time_server_UTC')[
    '10.pv1'].mean() * 25 - 12.065081

df_ref_resampled = df_ref.resample(average_set, on='time_server_UTC')[
    '4.pv2'].mean() * 25 - 9.560792

# Calculate the difference between the two Series
difference_resampled = df_sens_resampled - df_ref_resampled

# Sort difference_resampled by values
sorted_difference_resampled = difference_resampled.sort_values()

# Calculate min and max differences
min_difference = sorted_difference_resampled.iloc[0]
max_difference = sorted_difference_resampled.iloc[-1]

# Create a Plotly line plot for '0x0A' and 'reference' with custom line colors
fig_combined_pv = px.line()
fig_combined_pv.add_scatter(
    x=df_sens_resampled.index, y=df_sens_resampled, name='0x0A', mode='lines', line_shape='spline', line_color='#16CC62')
fig_combined_pv.add_scatter(
    x=df_ref_resampled.index, y=df_ref_resampled, name='referencia', mode='lines', line_shape='spline', line_color='#333333')

# Add text annotations for min and max differences
fig_combined_pv.add_annotation(
    text=f'Minimálny rozdiel: {min_difference:.3f} mm', xref='paper', yref='paper', x=0.95, y=0.95, showarrow=False)
fig_combined_pv.add_annotation(
    text=f'Maximálny rozdiel: {max_difference:.3f} mm', xref='paper', yref='paper', x=0.95, y=0.90, showarrow=False)

fig_combined_pv.update_layout(title='Pole 1, snímač 1',
                              xaxis_title='Dátum',
                              yaxis_title='Lineárny posun [mm]',
                              legend=dict(orientation='h', yanchor='bottom',
                                          y=1.02, xanchor='right', x=1),
                              plot_bgcolor='#EEEEEE',
                              width=900, height=500,
                              margin=dict(l=50, r=25, t=50, b=50))

# Show the plot
fig_combined_pv.show()

In [9]:
import pandas as pd
import plotly.express as px

df_sens = df_spans_flattened[3]

average_set = "6h"

# Convert 'time_server_UTC' to datetime and resample
df_sens['time_server_UTC'] = pd.to_datetime(df_sens['time_server_UTC'])

# Resample and compute mean for the relevant columns
df_temp_1 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv1'].mean()

df_temp_2 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv2'].mean()

# Calculate the difference between temperatures
temperature_difference = df_temp_2 - df_temp_1

# Sort difference_resampled by values
sorted_difference = temperature_difference.sort_values()

# Calculate min and max differences
min_difference = sorted_difference.iloc[0]
max_difference = sorted_difference.iloc[-1]

# Create a Plotly line plot for temperature differences
fig_temperature_difference = px.line()
fig_temperature_difference.add_scatter(
    x=temperature_difference.index, y=temperature_difference, name='Teplotný rozdiel', mode='lines', line_shape='spline', line_color='#333333')

fig_temperature_difference.update_layout(title='Teplotný spád',
                                         xaxis_title='Dátum',
                                         yaxis_title='Teplotný rozdiel [°C]',
                                         legend=dict(orientation='h', yanchor='bottom',
                                                     y=1.02, xanchor='right', x=1),
                                         plot_bgcolor='#EEEEEE',
                                         width=900, height=500,
                                         margin=dict(l=50, r=25, t=50, b=50))

# Add text annotations for min and max differences
fig_temperature_difference.add_annotation(
    text=f'Minimálny rozdiel: {min_difference:.1f} °C', xref='paper', yref='paper', x=0.95, y=0.95, showarrow=False)
fig_temperature_difference.add_annotation(
    text=f'Maximálny rozdiel: {max_difference:.1f} °C', xref='paper', yref='paper', x=0.95, y=0.90, showarrow=False)

# Show the plot
fig_temperature_difference.show()

In [10]:
import pandas as pd
import plotly.express as px

df_sens = df_spans_flattened[3]

average_set = "6h"

# Convert 'time_server_UTC' to datetime and resample
df_sens['time_server_UTC'] = pd.to_datetime(df_sens['time_server_UTC'])

# Resample and compute mean for the relevant columns
df_temp_1 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv1'].mean()

df_temp_2 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv2'].mean()


# Create a Plotly line plot for '0x0A' and 'reference' with custom line colors
fig_combined_pv = px.line()
fig_combined_pv.add_scatter(
    x=df_temp_1.index, y=df_temp_1, name='Spodná pásnica', mode='lines', line_shape='spline', line_color='#FF6600')
fig_combined_pv.add_scatter(
    x=df_temp_2.index, y=df_temp_2, name='Horná pásnica', mode='lines', line_shape='spline', line_color='#0066FF')


fig_combined_pv.update_layout(title='Teplota konštrukcie',
                              xaxis_title='Dátum',
                              yaxis_title='Teplota [°C]',
                              legend=dict(orientation='h', yanchor='bottom',
                                          y=1.02, xanchor='right', x=1),
                              plot_bgcolor='#EEEEEE',
                              width=900, height=500,
                              margin=dict(l=50, r=25, t=50, b=50))


# Show the plot
fig_combined_pv.show()

In [11]:
df_sens.describe()

Unnamed: 0,22.pv0,22.pv1,24.pv0,24.pv1,29.pv0,29.pv1,31.pv1,31.pv2,4.pv0,4.pv1,4.pv2,meta_bootCount,meta_devices,meta_errors,meta_modem_signalQuality,meta_power_battery_V,meta_power_solar_V,meta_uuid,time_server_UTC,time_server_epoch
count,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398.0,35398,35398.0
mean,0.498442,0.674841,0.506435,0.682521,0.568345,0.622879,8.669533,9.565777,0.692308,0.785347,0.381138,18655.935985,5.0,0.0,22.137409,4.162586,1.562,207137700000000.0,2024-03-03 00:15:41.297050880,1709425000.0
min,0.494649,0.670009,0.499862,0.678955,0.564594,0.617888,-8.8125,-6.25,0.686713,0.779424,0.376261,789.0,5.0,0.0,6.0,3.836,1.562,207137700000000.0,2023-10-31 13:05:53,1698758000.0
25%,0.497204,0.67372,0.504065,0.681735,0.567154,0.621761,2.125,2.3125,0.691623,0.783456,0.380078,9658.25,5.0,0.0,21.0,4.128,1.562,207137700000000.0,2023-12-31 20:02:08,1704053000.0
50%,0.498288,0.674826,0.505696,0.682586,0.568304,0.622576,7.3125,7.8125,0.692367,0.785037,0.381098,18601.5,5.0,0.0,22.0,4.17,1.562,207137700000000.0,2024-03-02 15:15:13.500000,1709393000.0
75%,0.499957,0.675898,0.508256,0.683436,0.569555,0.624389,14.75,17.875,0.693187,0.787398,0.382253,27777.75,5.0,0.0,24.0,4.198,1.562,207137700000000.0,2024-05-05 01:04:13,1714871000.0
max,0.5018,0.679096,0.513869,0.68519,0.572195,0.627499,32.125,31.375,0.695974,0.790803,0.385825,36673.0,5.0,0.0,31.0,4.802,1.562,207137700000000.0,2024-07-05 12:16:39,1720182000.0
std,0.001606,0.001573,0.003045,0.001137,0.001549,0.00183,7.953498,8.613843,0.001209,0.002434,0.001619,10371.621149,0.0,0.0,2.150506,0.053109,4.440955e-16,0.0,,6192294.0


In [12]:
df_sens.columns

Index(['_id', '22.pv0', '22.pv1', '24.pv0', '24.pv1', '29.pv0', '29.pv1',
       '31.pv1', '31.pv2', '4.pv0', '4.pv1', '4.pv2', 'meta_bootCount',
       'meta_devices', 'meta_errors', 'meta_modem_signalQuality',
       'meta_power_battery_V', 'meta_power_solar_V', 'meta_uuid',
       'meta_version', 'time_server_UTC', 'time_server_epoch'],
      dtype='object')

In [13]:
import pandas as pd
import plotly.graph_objs as go

# Assume df_spans_flattened is a list of DataFrames and we're using the 4th DataFrame
df_sens = df_spans_flattened[3]

average_set = "6h"

# Convert 'time_server_UTC' to datetime
df_sens['time_server_UTC'] = pd.to_datetime(df_sens['time_server_UTC'])

# Resample and compute mean for the relevant columns
df_temp_1 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv1'].mean()
df_temp_2 = df_sens.resample(average_set, on='time_server_UTC')[
    '31.pv2'].mean()

# Compute the temperature difference
temp_diff = df_temp_1 - df_temp_2

# Create the plot
trace_diff = go.Scatter(
    x=temp_diff.index,
    y=temp_diff.values,
    mode='lines',
    name='Temperature Difference (31.pv1 - 31.pv2)'
)

# Create figure
fig = go.Figure()

# Add trace
fig.add_trace(trace_diff)

# Update layout
fig.update_layout(
    title='Temperature Difference over Time',
    xaxis_title='Date',
    yaxis_title='Temperature Difference (31.pv1 - 31.pv2)',
    xaxis=dict(tickangle=45),
    plot_bgcolor='#EEEEEE',
    width=900,
    height=500
)

# Show the plot
fig.show()

In [25]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import os


def plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column, average_set="6h", switch_axes=False, multiply_by=25, scheme="temps", save=False, save_path="imgs"):
    # Ensure save_path exists
    if save and not os.path.exists(save_path):
        os.makedirs(save_path)

    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature difference columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Compute the temperature difference
    temp_diff = df_temp_1 - df_temp_2

    # Resample the specified sensor data and apply the multiplier
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean() * multiply_by

    # Calculate max and min values of the sensor for annotation
    max_sensor_value = df_sensor.max()
    min_sensor_value = df_sensor.min()
    sensor_range = max_sensor_value - min_sensor_value

    # Get the time values for the color scheme
    time_values = df_temp_1.index.astype(int) / 10**9

    # Create the scatter plot
    if switch_axes:
        scatter_plot = go.Scatter(
            x=temp_diff.values,
            y=df_sensor.values,
            mode='markers',
            marker=dict(
                size=5,
                color=time_values,     # Color by time
                colorscale=scheme,  # Use a colorscale
                showscale=True,
                colorbar=dict(
                    title='Dátum',
                    tickvals=time_values[::len(time_values)//5],  # 5 ticks
                    ticktext=pd.to_datetime(
                        time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
                )
            ),
            name=f'Rozdiel teplôt konštrukcie vs {sensor_column}'
        )
        x_axis_title = 'Rozdiel teplôt konštrukcie [°C]'
        y_axis_title = "Lineárny posun [mm]"
    else:
        scatter_plot = go.Scatter(
            x=df_sensor.values,
            y=temp_diff.values,
            mode='markers',
            marker=dict(
                size=5,
                color=time_values,     # Color by time
                colorscale=scheme,  # Use a colorscale
                showscale=True,
                colorbar=dict(
                    title='Dátum',
                    tickvals=time_values[::len(time_values)//5],  # 5 ticks
                    ticktext=pd.to_datetime(
                        time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
                )
            ),
            name=f'Čidlo {sensor_column} vs rozdiel teplôt konštrukcie.'
        )
        x_axis_title = "Lineárny posun [mm]"
        y_axis_title = 'Rozdiel teplôt konštrukcie [°C]'

    # Create figure
    fig = go.Figure()

    # Add scatter plot trace
    fig.add_trace(scatter_plot)

    # Update layout with Montserrat font and add annotation
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs rozdiel teplôt konštrukcie.',
        xaxis_title=x_axis_title,
        yaxis_title=y_axis_title,
        plot_bgcolor='#EEEEEE',
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=800,
        height=500,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=1.35,
                y=-0.3,
                xref='paper',
                yref='paper',
                text=f"Δ = {sensor_range:.2f} mm",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="right"
            )
        ]
    )

    if save:
        file_path = os.path.join(save_path, f'{sensor_column}.png')
        pio.write_image(fig, file_path)

    # Show the plot
    fig.show()


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
# Ensure this is the correct DataFrame containing sensor data
df_sens = df_spans_flattened[3]

sensor_column = '22.pv1'
plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column,
                         switch_axes=True, scheme='temps', average_set="6h", save=True)

In [112]:
import pandas as pd
from scipy.stats import pearsonr
import numpy as np


def analyze_correlation_by_period(df_temp, df_sens, sensor_column, average_set="6h", num_periods=3):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Compute the temperature difference
    temp_diff = df_temp_1 - df_temp_2

    # Resample the specified sensor data
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean()

    # Combine the data into a single DataFrame for cleaning
    combined_df = pd.DataFrame(
        {'temp_diff': temp_diff, 'sensor': df_sensor}).dropna()

    # Ensure we split the data into exactly num_periods parts
    period_length = len(combined_df) // num_periods
    remainder = len(combined_df) % num_periods
    start_indices = [i * period_length +
                     min(i, remainder) for i in range(num_periods)]
    end_indices = [(i + 1) * period_length + min(i + 1, remainder)
                   for i in range(num_periods)]

    periods = [combined_df.iloc[start:end]
               for start, end in zip(start_indices, end_indices)]

    # Analyze each period
    results = []
    correlations = []
    p_values = []
    for i, period in enumerate(periods):
        if len(period) > 1:
            correlation, p_value = pearsonr(
                period['temp_diff'], period['sensor'])
        else:
            correlation, p_value = float('nan'), float('nan')
        results.append((f"Period {i+1}", correlation, p_value))
        correlations.append(correlation)
        p_values.append(p_value)
        # print(f"Pearson correlation for {
        #       sensor_column} vs temperature difference in Period {i+1}: {correlation:.4f}")
        # print(
        #     f"P-value for {sensor_column} vs temperature difference in Period {i+1}: {p_value:.2e}")

    mean_correlation = np.nanmean(correlations)
    mean_p_value = np.nanmean(p_values)

    print(f"\nMean Pearson correlation for {
          sensor_column}: {mean_correlation:.4f}")
    print(f"Mean P-value for {sensor_column}: {mean_p_value:.2e}")

    return results


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
df_sens = df_spans_flattened[3]  # DataFrame containing sensor data

sensor_column = '22.pv1'
num_periods = 1
results = analyze_correlation_by_period(
    df_temp, df_sens, sensor_column, average_set="6h", num_periods=num_periods)


Mean Pearson correlation for 22.pv1: 0.1188
Mean P-value for 22.pv1: 1.86e-04


In [109]:
import pandas as pd
from scipy.stats import pearsonr
import numpy as np
import plotly.graph_objs as go
import plotly.io as pio


def analyze_correlation_by_period(df_temp, df_sens, sensor_column, average_set="6h", num_periods=3):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Compute the temperature difference
    temp_diff = df_temp_1 - df_temp_2

    # Resample the specified sensor data
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean()

    # Combine the data into a single DataFrame for cleaning
    combined_df = pd.DataFrame(
        {'temp_diff': temp_diff, 'sensor': df_sensor}).dropna()

    # Ensure we split the data into exactly num_periods parts
    period_length = len(combined_df) // num_periods
    remainder = len(combined_df) % num_periods
    start_indices = [i * period_length +
                     min(i, remainder) for i in range(num_periods)]
    end_indices = [(i + 1) * period_length + min(i + 1, remainder)
                   for i in range(num_periods)]

    periods = [combined_df.iloc[start:end]
               for start, end in zip(start_indices, end_indices)]

    # Analyze each period
    correlations = []
    for i, period in enumerate(periods):
        if len(period) > 1:
            correlation, _ = pearsonr(period['temp_diff'], period['sensor'])
        else:
            correlation = float('nan')
        correlations.append(correlation)

    mean_correlation = np.nanmean(correlations)

    return mean_correlation


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
df_sens = df_spans_flattened[3]  # DataFrame containing sensor data

sensor_column = '22.pv1'

# Initialize lists to store results
num_periods_list = []
mean_correlations_list = []

# Perform analysis for different numbers of divisions
for num_periods in range(2, 101):
    mean_correlation = analyze_correlation_by_period(
        df_temp, df_sens, sensor_column, average_set="1h", num_periods=num_periods)
    num_periods_list.append(num_periods)
    mean_correlations_list.append(mean_correlation)
    # print(f"Number of periods: {num_periods}, Mean Pearson correlation: {
    #       mean_correlation:.4f}")

# Create Plotly figure
fig = go.Figure()

# Add trace
fig.add_trace(go.Scatter(
    x=num_periods_list,
    y=mean_correlations_list,
    mode='lines+markers',
    name='Priemerná r-hodnota',
    marker=dict(color='#FF6600', size=8)
))

# Update layout
fig.update_layout(
    title='Vplyv delenia sledovaného obdobia na koreláciu rozdielu teplôt ku deformáciám',
    xaxis_title='Počet rovnomerných čiastkových časových období',
    yaxis_title='Priemerná r-hodnota',
    plot_bgcolor='rgba(240, 240, 240, 0.95)',
    font=dict(family="Montserrat, sans-serif", size=12, color="#000000")
)

# Show plot
fig.show()

In [45]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import os
from plotly.subplots import make_subplots


def plot_sensor_vs_temp(df_temp, df_sens, sensor_column, average_set="6h", multiply_by=25, scheme="temps", save=False, save_path="imgs"):
    # Ensure save_path exists
    if save and not os.path.exists(save_path):
        os.makedirs(save_path)

    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Resample the specified sensor data and apply the multiplier
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean() * multiply_by

    # Calculate max and min values of the sensor for annotation
    max_sensor_value = df_sensor.max()
    min_sensor_value = df_sensor.min()
    sensor_range = max_sensor_value - min_sensor_value

    # Get the time values for the color scheme
    time_values = df_temp_1.index.astype(int) / 10**9

    # Create the scatter plots
    scatter_plot_1 = go.Scatter(
        x=df_temp_1.values,
        y=df_sensor.values,
        mode='markers',
        marker=dict(
            size=3,
            color=time_values,     # Color by time
            colorscale=scheme,  # Use a colorscale
            showscale=True,
            colorbar=dict(
                title='Dátum',
                tickvals=time_values[::len(time_values)//5],  # 5 ticks
                ticktext=pd.to_datetime(
                    time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
            )
        ),
        showlegend=False  # Hide legend
    )
    scatter_plot_2 = go.Scatter(
        x=df_temp_2.values,
        y=df_sensor.values,
        mode='markers',
        marker=dict(
            size=3,
            color=time_values,     # Color by time
            colorscale=scheme,  # Use a colorscale
            showscale=True,
            colorbar=dict(
                title='Dátum',
                tickvals=time_values[::len(time_values)//5],  # 5 ticks
                ticktext=pd.to_datetime(
                    time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
            )
        ),
        showlegend=False  # Hide legend
    )

    x_axis_title_1 = 'Teplota 31.pv1 (spodná) [°C]'
    x_axis_title_2 = 'Teplota 31.pv2 (horná) [°C]'
    y_axis_title = "Lineárny posun [mm]"

    # Create subplots
    fig = make_subplots(rows=1, cols=2, subplot_titles=(
        'Teplota 31.pv1', 'Teplota 31.pv2'))

    # Add scatter plot traces
    fig.add_trace(scatter_plot_1, row=1, col=1)
    fig.add_trace(scatter_plot_2, row=1, col=2)

    # Update layout with Montserrat font and add annotation
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs Teplota',
        plot_bgcolor='#EEEEEE',
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=900,  # Adjust width for side-by-side plots
        height=400,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=1.35,
                y=-0.3,
                xref='paper',
                yref='paper',
                text=f"Δ = {sensor_range:.2f} mm",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="right"
            )
        ],
        showlegend=False  # Hide legend
    )

    # Update x and y axis titles
    fig.update_xaxes(title_text=x_axis_title_1, row=1, col=1)
    fig.update_xaxes(title_text=x_axis_title_2, row=1, col=2)
    fig.update_yaxes(title_text=y_axis_title, row=1, col=1)
    fig.update_yaxes(title_text=y_axis_title, row=1, col=2)

    if save:
        file_path = os.path.join(save_path, f'{sensor_column}.png')
        pio.write_image(fig, file_path)

    # Show the plot
    fig.show()


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
# Ensure this is the correct DataFrame containing sensor data
df_sens = df_spans_flattened[3]

sensor_column = '22.pv1'
plot_sensor_vs_temp(df_temp, df_sens, sensor_column,
                    scheme='temps', average_set="6h", save=True)

In [36]:
import pandas as pd
from scipy.stats import pearsonr


def analyze_correlation(df_temp, df_sens, sensor_column, average_set="6h"):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Resample the specified sensor data
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean()

    # Combine the data into a single DataFrame for cleaning
    combined_df_1 = pd.DataFrame(
        {'temp_1': df_temp_1, 'sensor': df_sensor}).dropna()
    combined_df_2 = pd.DataFrame(
        {'temp_2': df_temp_2, 'sensor': df_sensor}).dropna()

    # Calculate Pearson correlation and p-value for 22.pv1 vs 31.pv1
    correlation_1, p_value_1 = pearsonr(
        combined_df_1['temp_1'], combined_df_1['sensor'])

    # Calculate Pearson correlation and p-value for 22.pv1 vs 31.pv2
    correlation_2, p_value_2 = pearsonr(
        combined_df_2['temp_2'], combined_df_2['sensor'])

    # Print the results
    print(f"Pearson correlation for {
          sensor_column} vs 31.pv1: {correlation_1:.4f}")
    print(f"P-value for {sensor_column} vs 31.pv1: {p_value_1:.2e}")

    print(f"Pearson correlation for {
          sensor_column} vs 31.pv2: {correlation_2:.4f}")
    print(f"P-value for {sensor_column} vs 31.pv2: {p_value_2:.2e}")

    return (correlation_1, p_value_1), (correlation_2, p_value_2)


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
# Ensure this is the correct DataFrame containing sensor data
df_sens = df_spans_flattened[3]

sensor_column = '22.pv1'
results_31_pv1, results_31_pv2 = analyze_correlation(
    df_temp, df_sens, sensor_column, average_set="6h")

Pearson correlation for 22.pv1 vs 31.pv1: -0.8762
P-value for 22.pv1 vs 31.pv1: 2.96e-314
Pearson correlation for 22.pv1 vs 31.pv2: -0.8322
P-value for 22.pv1 vs 31.pv2: 2.56e-254


In [15]:
import pandas as pd
import plotly.graph_objs as go
import numpy as np
from scipy.stats import pearsonr
import os


def plot_std_vs_temp_diff_with_correlation(df_sens, sensor_column, average_set="6h", multiply_by=25, scheme="temps", save=False):
    # Create a copy of the dataframe to avoid altering the original data
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Compute the temperature difference first
    df_sens_copy['temp_diff'] = df_sens_copy['31.pv1'] - df_sens_copy['31.pv2']

    # Multiply the specified sensor data before resampling
    df_sens_copy[sensor_column] = df_sens_copy[sensor_column] * multiply_by

    # Resample and compute std for the temperature difference
    temp_diff_std = df_sens_copy.resample(average_set, on='time_server_UTC')[
        'temp_diff'].std().sort_index()

    # Resample and compute std for the sensor data
    df_sensor_std = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].std().sort_index()

    # Calculate the Pearson correlation coefficient
    corr, p_value = pearsonr(temp_diff_std.dropna(), df_sensor_std.dropna())
    print(f"Pearson Correlation Coefficient: {corr}, p-value: {p_value}")

    # Get the time values for the color scheme
    time_values = temp_diff_std.index.astype(int) / 10**9

    # Create the scatter plot
    scatter_plot = go.Scatter(
        x=temp_diff_std.values,
        y=df_sensor_std.values,
        mode='markers',
        marker=dict(
            size=4,
            color=time_values,     # Color by time
            colorscale=scheme,  # Use a colorscale
            showscale=False,  # Remove colorbar
        ),
        name=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie'
    )

    # Fit a line to the data
    m, b = np.polyfit(temp_diff_std.dropna(), df_sensor_std.dropna(), 1)
    fit_line = go.Scatter(
        x=temp_diff_std.values,
        y=m * temp_diff_std.values + b,
        mode='lines',
        # Updated line style with transparency and dash
        line=dict(color='rgba(0, 0, 0, 0.5)', width=3),
        name='Fitted Line'
    )

    # Create figure
    fig = go.Figure()

    # Add scatter plot trace
    fig.add_trace(scatter_plot)
    # Add fitted line trace
    fig.add_trace(fit_line)

    # Update layout
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie',
        xaxis_title='STD teplotného rozdielu konštrukcie [°C]',
        yaxis_title=f'STD vzdialenosti [mm]',
        plot_bgcolor='#EEEEEE',
        showlegend=False,  # Remove the legend
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=800,
        height=500,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=-0.2,
                y=-0.2,
                xref='paper',
                yref='paper',
                text=f"ρ = {p_value:.2e}",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="left"
            )
        ]
    )

    if save:
        file_path = f'{sensor_column}_disp.png'
        fig.write_image(file_path)

    # Show the plot
    fig.show()


# Example usage
# Ensure this is the correct DataFrame containing sensor data
df_sens = df_spans_flattened[3]
sensor_column = '22.pv0'
plot_std_vs_temp_diff_with_correlation(
    df_sens, sensor_column, scheme='temps', average_set="12h", save=True)

Pearson Correlation Coefficient: 0.8415361648878803, p-value: 2.0670450218270207e-133


In [16]:
for _ in df_spans_flattened:
    print(_.columns)

Index(['_id', '10.pv0', '10.pv1', '11.pv0', '11.pv1', '17.pv0', '17.pv1',
       '19.pv0', '19.pv1', '20.pv0', '20.pv1', '25.pv0', '25.pv1', '27.pv0',
       '27.pv1', '6.pv0', '6.pv1', 'meta_bootCount', 'meta_devices',
       'meta_errors', 'meta_modem_signalQuality', 'meta_power_battery_V',
       'meta_power_solar_V', 'meta_uuid', 'meta_version', 'time_server_UTC',
       'time_server_epoch'],
      dtype='object')
Index(['_id', '15.pv0', '15.pv1', '2.pv0', '2.pv1', '28.pv0', '28.pv1',
       '3.pv0', '3.pv1', '3.pv2', 'meta_bootCount', 'meta_devices',
       'meta_errors', 'meta_modem_signalQuality', 'meta_power_battery_V',
       'meta_power_solar_V', 'meta_uuid', 'meta_version', 'time_server_UTC',
       'time_server_epoch'],
      dtype='object')
Index(['_id', '1.pv0', '1.pv1', '12.pv0', '12.pv1', '13.pv0', '13.pv1',
       '14.pv0', '14.pv1', '23.pv0', '23.pv1', '26.pv0', '26.pv1', '30.pv0',
       '30.pv1', '30.pv2', '8.pv0', '8.pv1', 'meta_bootCount', 'meta_devices',
       '

In [17]:
import pandas as pd
import plotly.graph_objs as go


def plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column, average_set="6h", switch_axes=False, multiply_by=25, scheme="Viridis"):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature difference columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Compute the temperature difference
    temp_diff = df_temp_1 - df_temp_2
    temp_diff = temp_diff.reset_index()
    temp_diff.columns = ['time_server_UTC', 'temp_diff']

    # Resample the specified sensor data and apply the multiplier
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean() * multiply_by
    df_sensor = df_sensor.reset_index()
    df_sensor.columns = ['time_server_UTC', 'sensor_value']

    # Merge the dataframes on the closest timestamps
    merged_df = pd.merge_asof(temp_diff, df_sensor,
                              on='time_server_UTC', direction='nearest')

    # Get the time values for the color scheme
    time_values = merged_df['time_server_UTC'].astype(int) / 10**9

    # Create the scatter plot
    if switch_axes:
        scatter_plot = go.Scatter(
            x=merged_df['temp_diff'],
            y=merged_df['sensor_value'],
            mode='markers',
            marker=dict(
                size=10,
                color=time_values,     # Color by time
                colorscale=scheme,     # Use a colorscale
                showscale=True,
                colorbar=dict(title='Time')
            ),
            name=f'Temperature Difference vs {sensor_column}'
        )
        x_axis_title = 'Temperature Difference (31.pv1 - 31.pv2)'
        y_axis_title = sensor_column
    else:
        scatter_plot = go.Scatter(
            x=merged_df['sensor_value'],
            y=merged_df['temp_diff'],
            mode='markers',
            marker=dict(
                size=10,
                color=time_values,     # Color by time
                colorscale=scheme,     # Use a colorscale
                showscale=True,
                colorbar=dict(title='Time')
            ),
            name=f'{sensor_column} vs Temperature Difference'
        )
        x_axis_title = sensor_column
        y_axis_title = 'Temperature Difference (31.pv1 - 31.pv2)'

    # Create figure
    fig = go.Figure()

    # Add scatter plot trace
    fig.add_trace(scatter_plot)

    # Update layout
    fig.update_layout(
        title=f'{sensor_column} vs Temperature Difference (31.pv1 - 31.pv2)',
        xaxis_title=x_axis_title,
        yaxis_title=y_axis_title,
        plot_bgcolor='#EEEEEE',
        legend=dict(orientation='h', yanchor='bottom', y=1.02,
                    xanchor='right', x=1)  # Move legend to the top
    )

    # Show the plot
    fig.show()


# Example usage
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data
df_sens = df_spans_flattened[1]  # DataFrame containing sensor data

sensor_column = '15.pv0'  # Replace with the desired sensor column
plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column,
                         switch_axes=True, scheme='temps')

In [18]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import os


def plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column, average_set="6h", switch_axes=False, multiply_by=25, scheme="temps", save_path="imgs"):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature difference columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Compute the temperature difference
    temp_diff = df_temp_1 - df_temp_2

    # Resample the specified sensor data and apply the multiplier
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean() * multiply_by

    # Calculate max and min values of the sensor for annotation
    max_sensor_value = df_sensor.max()
    min_sensor_value = df_sensor.min()
    sensor_range = max_sensor_value - min_sensor_value

    # Get the time values for the color scheme
    time_values = df_temp_1.index.astype(int) / 10**9

    # Create the scatter plot
    if switch_axes:
        scatter_plot = go.Scatter(
            x=temp_diff.values,
            y=df_sensor.values,
            mode='markers',
            marker=dict(
                size=5,
                color=time_values,     # Color by time
                colorscale=scheme,  # Use a colorscale
                showscale=True,
                colorbar=dict(
                    title='Dátum',
                    tickvals=time_values[::len(time_values)//5],  # 5 ticks
                    ticktext=pd.to_datetime(
                        time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
                )
            ),
            name=f'Rozdiel teplôt konštrukcie vs {sensor_column}'
        )
        x_axis_title = 'Rozdiel teplôt konštrukcie [°C]'
        y_axis_title = "Lineárny posun [mm]"
    else:
        scatter_plot = go.Scatter(
            x=df_sensor.values,
            y=temp_diff.values,
            mode='markers',
            marker=dict(
                size=5,
                color=time_values,     # Color by time
                colorscale=scheme,  # Use a colorscale
                showscale=True,
                colorbar=dict(
                    title='Dátum',
                    tickvals=time_values[::len(time_values)//5],  # 5 ticks
                    ticktext=pd.to_datetime(
                        time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
                )
            ),
            name=f'Čidlo {sensor_column} vs rozdiel teplôt konštrukcie.'
        )
        x_axis_title = "Lineárny posun [mm]"
        y_axis_title = 'Rozdiel teplôt konštrukcie [°C]'

    # Create figure
    fig = go.Figure()

    # Add scatter plot trace
    fig.add_trace(scatter_plot)

    # Update layout with Montserrat font and add annotation
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs rozdiel teplôt konštrukcie.',
        xaxis_title=x_axis_title,
        yaxis_title=y_axis_title,
        plot_bgcolor='#EEEEEE',
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=400,
        height=250,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=1.35,
                y=-0.3,
                xref='paper',
                yref='paper',
                text=f"Δ = {sensor_range:.2f} mm",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="right"
            )
        ]
    )

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    file_path = os.path.join(save_path, f'{sensor_column}_vs_temp_diff.png')
    pio.write_image(fig, file_path)


# Dictionary summarizing sensors in each DataFrame
sensors_in_dfs = {
    "0": ["10.pv0", "10.pv1", "11.pv0", "11.pv1", "17.pv0", "17.pv1", "19.pv0", "19.pv1", "20.pv0", "20.pv1", "25.pv0", "25.pv1", "27.pv0", "27.pv1", "6.pv0", "6.pv1"],
    "1": ["15.pv0", "15.pv1", "2.pv0", "2.pv1", "28.pv0", "28.pv1", "3.pv0", "3.pv1", "3.pv2"],
    "2": ["1.pv0", "1.pv1", "12.pv0", "12.pv1", "13.pv0", "13.pv1", "14.pv0", "14.pv1", "23.pv0", "23.pv1", "26.pv0", "26.pv1", "30.pv0", "30.pv1", "30.pv2", "8.pv0", "8.pv1"],
    "3": ["22.pv0", "22.pv1", "24.pv0", "24.pv1", "29.pv0", "29.pv1", "31.pv1", "31.pv2", "4.pv0", "4.pv1", "4.pv2"],
    "4": ["16.pv0", "16.pv1", "18.pv0", "18.pv1", "21.pv0", "21.pv1", "5.pv0", "5.pv1", "7.pv0", "7.pv1", "9.pv0", "9.pv1"]
}

# Loop through the dictionary and create plots
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data

for df_index, sensors in sensors_in_dfs.items():
    # DataFrame containing sensor data
    df_sens = df_spans_flattened[int(df_index)]
    for sensor in sensors:
        # Exclude temperature sensors when plotting
        if sensor not in ['31.pv1', '31.pv2']:
            plot_sensor_vs_temp_diff(
                df_temp, df_sens, sensor, switch_axes=True, scheme='temps', save_path="imgs")

In [48]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
import os
from plotly.subplots import make_subplots


def plot_sensor_vs_temp_diff(df_temp, df_sens, sensor_column, average_set="6h", multiply_by=25, scheme="temps", save_path="imgs"):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature difference columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Resample the specified sensor data and apply the multiplier
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean() * multiply_by

    # Calculate max and min values of the sensor for annotation
    max_sensor_value = df_sensor.max()
    min_sensor_value = df_sensor.min()
    sensor_range = max_sensor_value - min_sensor_value

    # Get the time values for the color scheme
    time_values = df_temp_1.index.astype(int) / 10**9

    # Create the scatter plots
    scatter_plot_1 = go.Scatter(
        x=df_temp_1.values,
        y=df_sensor.values,
        mode='markers',
        marker=dict(
            size=3,
            color=time_values,     # Color by time
            colorscale=scheme,  # Use a colorscale
            showscale=True,
            colorbar=dict(
                title='Dátum',
                tickvals=time_values[::len(time_values)//5],  # 5 ticks
                ticktext=pd.to_datetime(
                    time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
            )
        ),
        showlegend=False  # Hide legend
    )
    scatter_plot_2 = go.Scatter(
        x=df_temp_2.values,
        y=df_sensor.values,
        mode='markers',
        marker=dict(
            size=3,
            color=time_values,     # Color by time
            colorscale=scheme,  # Use a colorscale
            showscale=True,
            colorbar=dict(
                title='Dátum',
                tickvals=time_values[::len(time_values)//5],  # 5 ticks
                ticktext=pd.to_datetime(
                    time_values[::len(time_values)//5] * 10**9).strftime('%m.%y')
            )
        ),
        showlegend=False  # Hide legend
    )

    x_axis_title_1 = 'Teplota 31.pv1 (spodná) [°C]'
    x_axis_title_2 = 'Teplota 31.pv2 (horná) [°C]'
    y_axis_title = "Lineárny posun [mm]"

    # Create subplots
    fig = make_subplots(rows=1, cols=2, subplot_titles=(
        'Teplota 31.pv1', 'Teplota 31.pv2'))

    # Add scatter plot traces
    fig.add_trace(scatter_plot_1, row=1, col=1)
    fig.add_trace(scatter_plot_2, row=1, col=2)

    # Update layout with Montserrat font and add annotation
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs Teplota',
        plot_bgcolor='#EEEEEE',
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=900,  # Adjust width for side-by-side plots
        height=375,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=1.35,
                y=-0.3,
                xref='paper',
                yref='paper',
                text=f"Δ = {sensor_range:.2f} mm",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="right"
            )
        ],
        showlegend=False  # Hide legend
    )

    # Update x and y axis titles
    fig.update_xaxes(title_text=x_axis_title_1, row=1, col=1)
    fig.update_xaxes(title_text=x_axis_title_2, row=1, col=2)
    fig.update_yaxes(title_text=y_axis_title, row=1, col=1)
    fig.update_yaxes(title_text=y_axis_title, row=1, col=2)

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    file_path = os.path.join(save_path, f'{sensor_column}.png')
    pio.write_image(fig, file_path)

    # Show the plot
    # fig.show()


# # Dictionary summarizing sensors in each DataFrame
# sensors_in_dfs = {
#     "0": ["10.pv0", "10.pv1", "11.pv0", "11.pv1", "17.pv0", "17.pv1", "19.pv0", "19.pv1", "20.pv0", "20.pv1", "25.pv0", "25.pv1", "27.pv0", "27.pv1", "6.pv0", "6.pv1"],
#     "1": ["15.pv0", "15.pv1", "2.pv0", "2.pv1", "28.pv0", "28.pv1", "3.pv0", "3.pv1", "3.pv2"],
#     "2": ["1.pv0", "1.pv1", "12.pv0", "12.pv1", "13.pv0", "13.pv1", "14.pv0", "14.pv1", "23.pv0", "23.pv1", "26.pv0", "26.pv1", "30.pv0", "30.pv1", "30.pv2", "8.pv0", "8.pv1"],
#     "3": ["22.pv0", "22.pv1", "24.pv0", "24.pv1", "29.pv0", "29.pv1", "31.pv1", "31.pv2", "4.pv0", "4.pv1", "4.pv2"],
#     "4": ["16.pv0", "16.pv1", "18.pv0", "18.pv1", "21.pv0", "21.pv1", "5.pv0", "5.pv1", "7.pv0", "7.pv1", "9.pv0", "9.pv1"]
# }

sensors_in_dfs = {
    "0": ["10.pv1", "6.pv1", "19.pv0", "17.pv1", "11.pv0", "20.pv0", "27.pv1", "25.pv1"],
    "1": ["15.pv0", "3.pv0", "2.pv1", "28.pv1"],
    "2": ["12.pv0", "30.pv0", "1.pv1", "14.pv0", "13.pv0", "26.pv0", "23.pv0", "8.pv1"],
    "3": ["29.pv0", "24.pv1", "22.pv0", "4.pv1"],
    "4": ["21.pv0", "18.pv1", "7.pv1", "16.pv0", "5.pv0", "9.pv0"]
}


# Loop through the dictionary and create plots
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data

for df_index, sensors in sensors_in_dfs.items():
    # DataFrame containing sensor data
    df_sens = df_spans_flattened[int(df_index)]
    for sensor in sensors:
        # Exclude temperature sensors when plotting
        if sensor not in ['31.pv1', '31.pv2']:
            plot_sensor_vs_temp_diff(
                df_temp, df_sens, sensor, scheme='temps', save_path="imgs3")

In [20]:
for df_index, sensors in sensors_in_dfs.items():
    # DataFrame containing sensor data
    df_sens = df_spans_flattened[int(df_index)]
    print(df_index, sensors)

0 ['10.pv0', '10.pv1', '11.pv0', '11.pv1', '17.pv0', '17.pv1', '19.pv0', '19.pv1', '20.pv0', '20.pv1', '25.pv0', '25.pv1', '27.pv0', '27.pv1', '6.pv0', '6.pv1']
1 ['15.pv0', '15.pv1', '2.pv0', '2.pv1', '28.pv0', '28.pv1', '3.pv0', '3.pv1', '3.pv2']
2 ['1.pv0', '1.pv1', '12.pv0', '12.pv1', '13.pv0', '13.pv1', '14.pv0', '14.pv1', '23.pv0', '23.pv1', '26.pv0', '26.pv1', '30.pv0', '30.pv1', '30.pv2', '8.pv0', '8.pv1']
3 ['22.pv0', '22.pv1', '24.pv0', '24.pv1', '29.pv0', '29.pv1', '31.pv1', '31.pv2', '4.pv0', '4.pv1', '4.pv2']
4 ['16.pv0', '16.pv1', '18.pv0', '18.pv1', '21.pv0', '21.pv1', '5.pv0', '5.pv1', '7.pv0', '7.pv1', '9.pv0', '9.pv1']


In [21]:
for _ in df_spans_flattened:
    print(_.columns)

Index(['_id', '10.pv0', '10.pv1', '11.pv0', '11.pv1', '17.pv0', '17.pv1',
       '19.pv0', '19.pv1', '20.pv0', '20.pv1', '25.pv0', '25.pv1', '27.pv0',
       '27.pv1', '6.pv0', '6.pv1', 'meta_bootCount', 'meta_devices',
       'meta_errors', 'meta_modem_signalQuality', 'meta_power_battery_V',
       'meta_power_solar_V', 'meta_uuid', 'meta_version', 'time_server_UTC',
       'time_server_epoch'],
      dtype='object')
Index(['_id', '15.pv0', '15.pv1', '2.pv0', '2.pv1', '28.pv0', '28.pv1',
       '3.pv0', '3.pv1', '3.pv2', 'meta_bootCount', 'meta_devices',
       'meta_errors', 'meta_modem_signalQuality', 'meta_power_battery_V',
       'meta_power_solar_V', 'meta_uuid', 'meta_version', 'time_server_UTC',
       'time_server_epoch'],
      dtype='object')
Index(['_id', '1.pv0', '1.pv1', '12.pv0', '12.pv1', '13.pv0', '13.pv1',
       '14.pv0', '14.pv1', '23.pv0', '23.pv1', '26.pv0', '26.pv1', '30.pv0',
       '30.pv1', '30.pv2', '8.pv0', '8.pv1', 'meta_bootCount', 'meta_devices',
       '

In [22]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import plotly.graph_objs as go
import plotly.io as pio
import os


def plot_std_vs_temp_diff_with_correlation(df_temp, df_sens, sensor_column, average_set="6h", multiply_by=25, scheme="temps", save=False, save_path="imgs"):
    # Ensure save_path exists
    if save and not os.path.exists(save_path):
        os.makedirs(save_path)

    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Compute the temperature difference
    df_temp_copy['temp_diff'] = df_temp_copy['31.pv1'] - df_temp_copy['31.pv2']

    # Merge sensor data with temperature data on the closest timestamp
    df_merged = pd.merge_asof(df_sens_copy.sort_values('time_server_UTC'), df_temp_copy.sort_values(
        'time_server_UTC'), on='time_server_UTC', direction='nearest')

    # Check if sensor_column exists in the merged DataFrame
    if sensor_column not in df_merged.columns:
        print(f"Sensor column {
              sensor_column} not found in DataFrame. Falling back to alternative method.")

        # Alternative method
        df_sens_copy['temp_diff'] = df_sens_copy['31.pv1'] - \
            df_sens_copy['31.pv2']
        df_sens_copy[sensor_column] = df_sens_copy[sensor_column] * multiply_by

        temp_diff_std = df_sens_copy.resample(average_set, on='time_server_UTC')[
            'temp_diff'].std().sort_index()
        df_sensor_std = df_sens_copy.resample(average_set, on='time_server_UTC')[
            sensor_column].std().sort_index()

        corr, p_value = pearsonr(
            temp_diff_std.dropna(), df_sensor_std.dropna())
        print(f"Pearson Correlation Coefficient: {corr}, p-value: {p_value}")

        time_values = temp_diff_std.index.astype(int) / 10**9

        scatter_plot = go.Scatter(
            x=temp_diff_std.values,
            y=df_sensor_std.values,
            mode='markers',
            marker=dict(
                size=4,
                color=time_values,
                colorscale=scheme,
                showscale=False,
            ),
            name=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie'
        )

        m, b = np.polyfit(temp_diff_std.dropna(), df_sensor_std.dropna(), 1)
        fit_line = go.Scatter(
            x=temp_diff_std.values,
            y=m * temp_diff_std.values + b,
            mode='lines',
            line=dict(color='rgba(0, 0, 0, 0.5)', width=3),
            name='Fitted Line'
        )

        fig = go.Figure()
        fig.add_trace(scatter_plot)
        fig.add_trace(fit_line)

        fig.update_layout(
            title=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie',
            xaxis_title='STD teplotného rozdielu konštrukcie [°C]',
            yaxis_title=f'STD vzdialenosti [mm]',
            plot_bgcolor='#EEEEEE',
            showlegend=False,
            font=dict(
                family="Montserrat, sans-serif",
                size=12,
                color="#000000"
            ),
            width=800,
            height=500,
            margin=dict(l=50, r=25, t=50, b=50),
            annotations=[
                dict(
                    x=-0.2,
                    y=-0.2,
                    xref='paper',
                    yref='paper',
                    text=f"ρ = {p_value:.2e}",
                    showarrow=False,
                    font=dict(
                        family="Montserrat, sans-serif",
                        size=12,
                        color="#000000"
                    ),
                    align="left"
                )
            ]
        )

        if save:
            file_path = os.path.join(save_path, f'{sensor_column}_disp.png')
            pio.write_image(fig, file_path)

        # Show the plot
        # fig.show()
        return

    # Multiply the specified sensor data before resampling
    df_merged[sensor_column] = df_merged[sensor_column] * multiply_by

    # Resample and compute std for the temperature difference
    temp_diff_std = df_merged.resample(average_set, on='time_server_UTC')[
        'temp_diff'].std().sort_index()

    # Resample and compute std for the sensor data
    df_sensor_std = df_merged.resample(average_set, on='time_server_UTC')[
        sensor_column].std().sort_index()

    # Drop duplicate indices if any
    temp_diff_std = temp_diff_std[~temp_diff_std.index.duplicated(
        keep='first')]
    df_sensor_std = df_sensor_std[~df_sensor_std.index.duplicated(
        keep='first')]

    # Calculate the Pearson correlation coefficient
    valid_mask = temp_diff_std.notna() & df_sensor_std.notna()
    corr, p_value = pearsonr(
        temp_diff_std[valid_mask], df_sensor_std[valid_mask])
    print(f"{sensor_column} Pearson Correlation Coefficient: {
          corr}, p-value: {p_value}")

    # Get the time values for the color scheme
    time_values = temp_diff_std.index.astype(int) / 10**9

    # Create the scatter plot
    scatter_plot = go.Scatter(
        x=temp_diff_std.values,
        y=df_sensor_std.values,
        mode='markers',
        marker=dict(
            size=4,
            color=time_values,
            colorscale=scheme,
            showscale=False,
        ),
        name=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie'
    )

    # Fit a line to the data
    m, b = np.polyfit(temp_diff_std[valid_mask], df_sensor_std[valid_mask], 1)
    fit_line = go.Scatter(
        x=temp_diff_std.values,
        y=m * temp_diff_std.values + b,
        mode='lines',
        line=dict(color='rgba(0, 0, 0, 0.5)', width=3),
        name='Fitted Line'
    )

    # Create figure
    fig = go.Figure()

    # Add scatter plot trace
    fig.add_trace(scatter_plot)
    # Add fitted line trace
    fig.add_trace(fit_line)

    # Update layout
    fig.update_layout(
        title=f'Čidlo {sensor_column} vs Teplotný rozdiel konštrukcie',
        xaxis_title='STD teplotného rozdielu konštrukcie [°C]',
        yaxis_title=f'STD vzdialenosti [mm]',
        plot_bgcolor='#EEEEEE',
        showlegend=False,
        font=dict(
            family="Montserrat, sans-serif",
            size=12,
            color="#000000"
        ),
        width=800,
        height=500,
        margin=dict(l=50, r=25, t=50, b=50),
        annotations=[
            dict(
                x=-0.2,
                y=-0.2,
                xref='paper',
                yref='paper',
                text=f"ρ = {p_value:.2e}",
                showarrow=False,
                font=dict(
                    family="Montserrat, sans-serif",
                    size=12,
                    color="#000000"
                ),
                align="left"
            )
        ]
    )

    if save:
        file_path = os.path.join(save_path, f'{sensor_column}_disp.png')
        pio.write_image(fig, file_path)

    # Show the plot
    # fig.show()


# Dictionary summarizing sensors in each DataFrame
sensors_in_dfs = {
    "0": ["10.pv0", "10.pv1", "11.pv0", "11.pv1", "17.pv0", "17.pv1", "19.pv0", "19.pv1", "20.pv0", "20.pv1", "25.pv0", "25.pv1", "27.pv0", "27.pv1", "6.pv0", "6.pv1"],
    "1": ["15.pv0", "15.pv1", "2.pv0", "2.pv1", "28.pv0", "28.pv1", "3.pv0", "3.pv1", "3.pv2"],
    "2": ["1.pv0", "1.pv1", "12.pv0", "12.pv1", "13.pv0", "13.pv1", "14.pv0", "14.pv1", "23.pv0", "23.pv1", "26.pv0", "26.pv1", "30.pv0", "30.pv1", "30.pv2", "8.pv0", "8.pv1"],
    "3": ["22.pv0", "22.pv1", "24.pv0", "24.pv1", "29.pv0", "29.pv1", "31.pv1", "31.pv2", "4.pv0", "4.pv1", "4.pv2"],
    "4": ["16.pv0", "16.pv1", "18.pv0", "18.pv1", "21.pv0", "21.pv1", "5.pv0", "5.pv1", "7.pv0", "7.pv1", "9.pv0", "9.pv1"]
}

# Loop through the dictionary and create plots
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data

for df_index, sensors in sensors_in_dfs.items():
    # DataFrame containing sensor data
    df_sens = df_spans_flattened[int(df_index)]
    for sensor in sensors:
        # Exclude temperature sensors when plotting
        if sensor not in ['31.pv1', '31.pv2']:
            plot_std_vs_temp_diff_with_correlation(
                df_temp, df_sens, sensor, average_set="6h", scheme='temps', save=False, save_path="disp2")

10.pv0 Pearson Correlation Coefficient: 0.7120983888811453, p-value: 5.061245024288684e-153
10.pv1 Pearson Correlation Coefficient: 0.7811300647365753, p-value: 4.322202566717537e-203
11.pv0 Pearson Correlation Coefficient: 0.737991220233007, p-value: 6.2200643567083206e-170
11.pv1 Pearson Correlation Coefficient: 0.7626723428945623, p-value: 4.891144947020094e-188
17.pv0 Pearson Correlation Coefficient: 0.7826009847758751, p-value: 2.3605130597343887e-204
17.pv1 Pearson Correlation Coefficient: 0.6289308694734224, p-value: 1.790695661785928e-109
19.pv0 Pearson Correlation Coefficient: 0.7175985950367875, p-value: 1.869154800179105e-156
19.pv1 Pearson Correlation Coefficient: 0.1353049301218412, p-value: 2.0575489511269683e-05
20.pv0 Pearson Correlation Coefficient: 0.5779394960012668, p-value: 9.212133173833921e-89
20.pv1 Pearson Correlation Coefficient: 0.7459480984656888, p-value: 1.538224164264871e-175
25.pv0 Pearson Correlation Coefficient: 0.6092114684094538, p-value: 5.137255938

In [23]:
for _ in df_spans_flattened:
    print(_.shape)

(35373, 27)
(35395, 20)
(35385, 28)
(35398, 22)
(25726, 23)


In [54]:
import pandas as pd
from scipy.stats import pearsonr


def analyze_correlation(df_temp, df_sens, sensor_column, average_set="6h"):
    # Create copies of the dataframes to avoid altering the original data
    df_temp_copy = df_temp.copy()
    df_sens_copy = df_sens.copy()

    # Convert 'time_server_UTC' to datetime
    df_temp_copy['time_server_UTC'] = pd.to_datetime(
        df_temp_copy['time_server_UTC'])
    df_sens_copy['time_server_UTC'] = pd.to_datetime(
        df_sens_copy['time_server_UTC'])

    # Resample and compute mean for the temperature columns
    df_temp_1 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv1'].mean()
    df_temp_2 = df_temp_copy.resample(average_set, on='time_server_UTC')[
        '31.pv2'].mean()

    # Resample the specified sensor data
    df_sensor = df_sens_copy.resample(average_set, on='time_server_UTC')[
        sensor_column].mean()

    # Combine the data into a single DataFrame for cleaning
    combined_df_1 = pd.DataFrame(
        {'temp_1': df_temp_1, 'sensor': df_sensor}).dropna()
    combined_df_2 = pd.DataFrame(
        {'temp_2': df_temp_2, 'sensor': df_sensor}).dropna()

    # Calculate Pearson correlation and p-value for sensor vs 31.pv1
    correlation_1, p_value_1 = pearsonr(
        combined_df_1['temp_1'], combined_df_1['sensor'])

    # Calculate Pearson correlation and p-value for sensor vs 31.pv2
    correlation_2, p_value_2 = pearsonr(
        combined_df_2['temp_2'], combined_df_2['sensor'])

    return (correlation_1, p_value_1), (correlation_2, p_value_2)


# Dictionary summarizing sensors in each DataFrame
sensors_in_dfs = {
    "0": ["10.pv1", "6.pv1", "19.pv0", "17.pv1", "11.pv0", "20.pv0", "27.pv1", "25.pv1"],
    "1": ["15.pv0", "3.pv0", "2.pv1", "28.pv1"],
    "2": ["12.pv0", "30.pv0", "1.pv1", "14.pv0", "13.pv0", "26.pv0", "23.pv0", "8.pv1"],
    "3": ["29.pv0", "24.pv1", "22.pv0", "4.pv1"],
    "4": ["21.pv0", "18.pv1", "7.pv1", "16.pv0", "5.pv0", "9.pv0"]
}

# Create a DataFrame to store the results
results_df = pd.DataFrame(columns=[
                          "sensor", "pearson_31.pv1", "p_value_31.pv1", "pearson_31.pv2", "p_value_31.pv2"])

# Loop through the dictionary and compute correlations
df_temp = df_spans_flattened[3]  # DataFrame containing temperature data

for df_index, sensors in sensors_in_dfs.items():
    # DataFrame containing sensor data
    df_sens = df_spans_flattened[int(df_index)]
    for sensor in sensors:
        # Compute correlations
        (correlation_1, p_value_1), (correlation_2, p_value_2) = analyze_correlation(
            df_temp, df_sens, sensor, average_set="6h")

        # Append the results to the DataFrame
        results_df = pd.concat([results_df, pd.DataFrame({
            "sensor": [sensor],
            "pearson_31.pv1": [correlation_1],
            "p_value_31.pv1": [p_value_1],
            "pearson_31.pv2": [correlation_2],
            "p_value_31.pv2": [p_value_2]
        })], ignore_index=True)

# Display the results
results_df


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Unnamed: 0,sensor,pearson_31.pv1,p_value_31.pv1,pearson_31.pv2,p_value_31.pv2
0,10.pv1,-0.493269,1.372076e-61,-0.424361,2.242219e-44
1,6.pv1,-0.814271,1.161239e-234,-0.782478,1.16627e-204
2,19.pv0,-0.429518,1.565875e-45,-0.384179,4.970779e-36
3,17.pv1,-0.673375,3.13876e-131,-0.633274,1.213864e-111
4,11.pv0,-0.652225,1.581639e-120,-0.631047,1.2240929999999999e-110
5,20.pv0,-0.884753,0.0,-0.880406,4.23e-321
6,27.pv1,-0.687827,4.518531e-139,-0.62297,4.56762e-107
7,25.pv1,-0.943006,0.0,-0.943117,0.0
8,15.pv0,-0.659164,6.075368e-124,-0.605995,6.843456e-100
9,3.pv0,-0.711656,4.671484e-153,-0.638806,3.592036e-114


In [57]:
print(results_df)

    sensor  pearson_31.pv1  p_value_31.pv1  pearson_31.pv2  p_value_31.pv2
0   10.pv1       -0.493269    1.372076e-61       -0.424361    2.242219e-44
1    6.pv1       -0.814271   1.161239e-234       -0.782478   1.166270e-204
2   19.pv0       -0.429518    1.565875e-45       -0.384179    4.970779e-36
3   17.pv1       -0.673375   3.138760e-131       -0.633274   1.213864e-111
4   11.pv0       -0.652225   1.581639e-120       -0.631047   1.224093e-110
5   20.pv0       -0.884753    0.000000e+00       -0.880406   4.229202e-321
6   27.pv1       -0.687827   4.518531e-139       -0.622970   4.567620e-107
7   25.pv1       -0.943006    0.000000e+00       -0.943117    0.000000e+00
8   15.pv0       -0.659164   6.075368e-124       -0.605995   6.843456e-100
9    3.pv0       -0.711656   4.671484e-153       -0.638806   3.592036e-114
10   2.pv1       -0.669909   2.048215e-129       -0.609742   1.943941e-101
11  28.pv1       -0.763043   1.049751e-188       -0.727766   2.384749e-163
12  12.pv0       -0.78460

In [56]:
results_df.describe()

Unnamed: 0,pearson_31.pv1,p_value_31.pv1,pearson_31.pv2,p_value_31.pv2
count,30.0,30.0,30.0,30.0
mean,-0.677239,1.046682e-29,-0.639134,1.467551e-21
std,0.139717,5.732914e-29,0.153881,8.038106e-21
min,-0.94392,0.0,-0.943117,0.0
25%,-0.745025,1.264134e-156,-0.69795,4.207623e-136
50%,-0.670873,1.124958e-129,-0.632161,6.727398e-111
75%,-0.608605,1.1886559999999998e-100,-0.562199,9.102506e-82
max,-0.340753,3.140046e-28,-0.286494,4.402652e-20
