In [38]:
import pymongo
import pandas as pd
from scipy.stats import pearsonr
import plotly.express as px



client = pymongo.MongoClient("mongodb://localhost:27017/")

# db = client["Monitoring"]
# collection = db["PRJ-16"]

db = client["SteelArena"]
collection = db["Init"]

In [5]:
def generate_pipeline(sensor1_name, sensor2_name, bin_interval, multiplier):
    pipeline = [
        {
            "$match": {
                f"{sensor2_name}.t": {
                    "$exists": True
                },
                f"{sensor1_name}.s": {
                    "$exists": True
                }
            }
        },
        {
            "$addFields": {
                "time_bin": {
                    "$dateTrunc": {
                        "date": {
                            "$toDate": "$time.datetime"
                        },
                        "unit": "minute",
                        "binSize": bin_interval
                    }
                }
            }
        },
        {
            "$group": {
                "_id": "$time_bin",
                f"{sensor2_name}_values": {
                    "$push": f"${sensor2_name}.t"
                },
                f"{sensor1_name}_values": {
                    "$push": f"${sensor1_name}.s"
                }
            }
        },
        {
            "$project": {
                "time_bin": "$_id",
                "_id": 0,
                f"median_{sensor2_name}": {
                    "$let": {
                        "vars": {
                            f"sorted_{sensor2_name}": {
                                "$sortArray": {
                                    "input": f"${sensor2_name}_values",
                                    "sortBy": 1
                                }
                            }
                        },
                        "in": {
                            "$arrayElemAt": [
                                f"$$sorted_{sensor2_name}", {
                                    "$floor": {
                                        "$divide": [
                                            {
                                                "$size": f"$$sorted_{sensor2_name}"
                                            }, 2
                                        ]
                                    }
                                }
                            ]
                        }
                    }
                },
                f"median_{sensor1_name}": {
                    "$multiply": [
                        {
                            "$let": {
                                "vars": {
                                    f"sorted_{sensor1_name}": {
                                        "$sortArray": {
                                            "input": f"${sensor1_name}_values",
                                            "sortBy": 1
                                        }
                                    }
                                },
                                "in": {
                                    "$arrayElemAt": [
                                        f"$$sorted_{sensor1_name}", {
                                            "$floor": {
                                                "$divide": [
                                                    {
                                                        "$size": f"$$sorted_{sensor1_name}"
                                                    }, 2
                                                ]
                                            }
                                        }
                                    ]
                                }
                            }
                        },
                        multiplier
                    ]
                }
            }
        },
        {
            "$sort": {
                "time_bin": 1
            }
        }
    ]
    return pipeline


# Example usage
pipeline = generate_pipeline("S27", "T4", 18, 3.11e-9)

In [53]:
def calculate_pearson_correlation(df, TLeaf, SLeaf):
    """
    Calculates the Pearson correlation coefficient and p-value between two columns in a DataFrame.

    Parameters:
    df (DataFrame): The DataFrame containing the data.
    TLeaf (str): The name of the temperature column (e.g., 'T4').
    SLeaf (str): The name of the strain column (e.g., 'S27').

    Returns:
    tuple: A tuple containing the Pearson correlation coefficient and the p-value.
    """
    # Calculate Pearson correlation and p-value
    correlation, p_value = pearsonr(
        df[f'median_{TLeaf}'], df[f'median_{SLeaf}'])

    # Display the results
    print(f"Pearson correlation coefficient: {correlation}")
    print(f"P-value: {p_value}")

    return correlation, p_value

In [39]:
def create_scatter_plot(df, TLeaf, SLeaf):
    """
    Creates a scatter plot between two columns in a DataFrame.

    Parameters:
    df (DataFrame): The DataFrame containing the data.
    TLeaf (str): The name of the temperature column (e.g., 'T4').
    SLeaf (str): The name of the strain column (e.g., 'S27').

    Returns:
    None
    """
    # Create a scatter plot
    fig = px.scatter(df, x=f'median_{SLeaf}', y=f'median_{TLeaf}',
                     labels={f'median_{SLeaf}': f'Median {SLeaf}',
                             f'median_{TLeaf}': f'Median {TLeaf}'},
                     title=f'Scatter Plot of {SLeaf} vs {TLeaf}')

    # Show the plot
    fig.show()

# Example usage
# create_scatter_plot(df, 'T4', 'S27')

In [6]:
# Example usage
pipeline = generate_pipeline("S27", "T4", 18, 3.11e-9)

In [51]:
{
    "T1": ["S7", "S8", "S9", "S10", "S11", "S12", "S13", "S14"],
    "T2": ["S15", "S16", "S17", "S18", "S11", "S12", "S13", "S14", "S22", "S23", "S25", "S26"],
    "T3": ["S22", "S23", "S25", "S26", "S19", "S20", "S21", "S24"]
 }

{'T1': ['S7', 'S8', 'S9', 'S10', 'S11', 'S12', 'S13', 'S14'],
 'T2': ['S15',
  'S16',
  'S17',
  'S18',
  'S11',
  'S12',
  'S13',
  'S14',
  'S22',
  'S23',
  'S25',
  'S26'],
 'T3': ['S22', 'S23', 'S25', 'S26', 'S19', 'S20', 'S21', 'S24']}

In [52]:
import pandas as pd

SLeaf = "S7"
TLeaf = "T1"


# Convert the results to a pandas DataFrame
df = pd.DataFrame(list(collection.aggregate(
    generate_pipeline(SLeaf, TLeaf, 60, 3.11e-9))))

# df.head(3)

calculate_pearson_correlation(df, TLeaf, SLeaf),
create_scatter_plot(df, TLeaf, SLeaf)

Pearson correlation coefficient: 0.590803341900452
P-value: 2.9888053554650467e-44


In [45]:
calculate_pearson_correlation(df, TLeaf, SLeaf), 
create_scatter_plot(df, TLeaf, SLeaf)

Pearson correlation coefficient: 0.590803341900452
P-value: 2.9888053554650467e-44


In [10]:
import plotly.graph_objects as go

# Assuming df is your DataFrame with the columns 'time_bin', 'median_T4', and 'median_S27'

fig = go.Figure()

# Add trace for temperatures
fig.add_trace(go.Scatter(
    x=df['time_bin'],
    y=df['median_T4'],
    mode='lines',
    name='Median T4',
    yaxis='y1'
))

# Add trace for strains
fig.add_trace(go.Scatter(
    x=df['time_bin'],
    y=df['median_S27'],
    mode='lines',
    name='Median S27',
    yaxis='y2'
))

# Update layout with two y-axes
fig.update_layout(
    title='Median Temperature and Strain Over Time',
    xaxis_title='Time',
    yaxis_title='Temperature (T4)',
    yaxis=dict(
        title='Temperature (T4)',
        side='left'
    ),
    yaxis2=dict(
        title='Strain (S27)',
        side='right',
        overlaying='y',
        showgrid=False
    ),
    legend_title='Series',
    hovermode="x unified"
)

# Show the plot
fig.show()