### Interactive Plot that Implement in Open Source Python the Sinclair, 1974 Method as Reported in Chiodini et al., 1998

The Sinclair method is a statistical procedure by which is possible to partition a dataset of non gaussian polymodal values into two or more log-normal sub-populations starting from a cumulative probability plot of the data. The Sinclair method can be useful to separate the background population from the anomalous populations. In our procedure the data can vary in complexity and the code is able to manage from one to five populations. For each population it is possible to chose the mean, the standard deviation and the fraction with respect to the total of the data, in order to find the best combination of log-normal populations that fit the distribution of the raw data.

In [5]:
import pyco2stats as PyCO2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load the dataset
my_dataset = pd.read_excel('PYTHONSS.xlsx')
my_dataset = my_dataset.dropna(subset=['A'])

# Maximum number of populations we want to support
max_populations = 5

# Initial parameters
initial_population_count = 3
initial_meds = [2, 1.3, 1, 1.5, 2.2]
initial_stds = [0.7, 0.9, 0.45, 0.5, 0.8]
initial_fds = [0.3, 0.2, 0.5, 0.4, 0.3]

# Create interactive widgets for number of populations and their parameters
population_count_slider = widgets.IntSlider(value=initial_population_count, min=1, max=max_populations, step=1, description='Populations:')
meds_sliders = [widgets.FloatSlider(value=initial_meds[i], min=0.1, max=10.0, step=0.01, description=f'Med {i+1}:') for i in range(max_populations)]
stds_sliders = [widgets.FloatSlider(value=initial_stds[i], min=0.1, max=2.0, step=0.01, description=f'Std {i+1}:') for i in range(max_populations)]
fds_sliders = [widgets.FloatSlider(value=initial_fds[i], min=0.0, max=1.0, step=0.01, description=f'Fd {i+1}:') for i in range(max_populations)]

# Style the sliders for better appearance and increased spacing
for slider in meds_sliders + stds_sliders + fds_sliders:
    slider.style.handle_color = 'lightblue'
    slider.layout.width = '300px'
    slider.layout.margin = '35px 0'  # Adding vertical margin to avoid overlap

# Create an output widget to capture the plot
output = widgets.Output()

# Function to update the visibility of parameter sliders
def update_slider_visibility(population_count):
    for i in range(max_populations):
        if i < population_count:
            meds_sliders[i].layout.display = 'block'
            stds_sliders[i].layout.display = 'block'
            fds_sliders[i].layout.display = 'block'
        else:
            meds_sliders[i].layout.display = 'none'
            stds_sliders[i].layout.display = 'none'
            fds_sliders[i].layout.display = 'none'
# Function to update the plot based on the widget values
def update_plot(*args):
    with output:
# Clear the output before plotting new graph
        clear_output(wait=True)
        
# Get the current values from the sliders
        population_count = population_count_slider.value
        meds = [slider.value for slider in meds_sliders[:population_count]]
        stds = [slider.value for slider in stds_sliders[:population_count]]
        fds = [slider.value for slider in fds_sliders[:population_count]]
        
# Normalize fds so they sum to 1
        fds_sum = sum(fds)
        if fds_sum != 0:
            fds = [fd / fds_sum for fd in fds]
        
# Create the figure and axis
        fig, ax = plt.subplots(figsize=(10, 6))  # Increase figure size
        
# Plot raw data with improved style
        PyCO2.Visualize.pp_raw_data(my_dataset.A, marker='o', ax=ax, s=20, c='#FF5733', alpha=0.7, label='Raw Data')
        # Plot combined population with enhanced styling
        PyCO2.Visualize.pp_combined_population(meds, stds, fds, mminy=-1, mmaxy=4, ax=ax, linestyle='-', linewidth=3, color='#3498DB', label='Combined Population')

# Plot combined population with enhanced styling
        PyCO2.Visualize.pp_single_populations(meds, stds, mminy=-1, mmaxy=4, ax=ax, linestyle='--', linewidth=1)
        
# Plot percentiles
        PyCO2.Visualize.pp_add_percentiles(ax=ax, percentiles='full', linestyle='-.', linewidth=1, color='#2ECC71')
        
# Adding titles and labels
        ax.set_title('Interactive Population Plot', fontsize=16)
        ax.set_xlabel('Data Points', fontsize=14)
        ax.set_ylabel('Values', fontsize=14)
        
# Adding legend and grid
        ax.legend(loc='best', fontsize=12)
        ax.grid(True, linestyle='--', alpha=0.6)
        
# Show the plot
        plt.show()
# Connect the update functions to the widgets
population_count_slider.observe(lambda change: update_slider_visibility(change['new']), names='value')
population_count_slider.observe(update_plot, names='value')

for slider in meds_sliders + stds_sliders + fds_sliders:
    slider.observe(update_plot, names='value')

# Layout the widgets using HBox and VBox
parameter_widgets = []
for i in range(max_populations):
    row = widgets.HBox([meds_sliders[i], stds_sliders[i], fds_sliders[i]])
    parameter_widgets.append(row)

# Combine the widgets into a VBox for better layout
widgets_box = widgets.VBox([population_count_slider] + parameter_widgets, layout=widgets.Layout(margin='10px 0'))

# Display the widgets and the output cell for the plot
display(widgets_box)
display(output)  # Show the output widget

# Initialize slider visibility and plot
update_slider_visibility(initial_population_count)
update_plot()  # Initial plot rendering


Collecting statsmodels
  Using cached statsmodels-0.14.4-cp39-cp39-win_amd64.whl.metadata (9.5 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Using cached patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Using cached statsmodels-0.14.4-cp39-cp39-win_amd64.whl (9.9 MB)
Using cached patsy-1.0.1-py2.py3-none-any.whl (232 kB)
Installing collected packages: patsy, statsmodels
Successfully installed patsy-1.0.1 statsmodels-0.14.4


VBox(children=(IntSlider(value=3, description='Populations:', max=5, min=1), HBox(children=(FloatSlider(value=…

Output()