## Описание данных

В качестве примера рассмотрим потребительские расходы на здравоохранение по регионам России за 2022 и 2023 годы. Они представлены в руб./мес. в среднем на члена 
домашнего хозяйств.

Источник данных: Росстат, Приложение 1. "Доходы, расходы и потребление домашних хозяйств по субъектам 
Российской Федераци", лист 1.4и
https://rosstat.gov.ru/storage/mediabank/Dohod_rashod_potreblen_3k-2023.htm

### Гистограммы

In [51]:
import pandas as pd

data = pd.read_excel("data.xlsx", sheet_name = "Data", index_col=0)
data.index.name = "region"

data

Unnamed: 0_level_0,expense_3q2022,expense_3q2023,healthP_3q2022,healthP_3q2023,health_3q2022,health_3q2023
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Белгородская область,21914.398,25241.122,2.568850,2.994974,562.948,755.965
Брянская область,18265.001,19635.899,2.933966,3.079054,535.889,604.600
Владимирская область,19813.828,23137.913,3.588756,3.879473,711.070,897.629
Воронежская область,17621.889,19502.166,4.558280,4.991471,803.255,973.445
Ивановская область,21681.452,23090.981,4.420234,5.235269,958.371,1208.875
...,...,...,...,...,...,...
Амурская область,19586.108,21720.673,5.966755,3.962138,1168.655,860.603
Магаданская область,36153.698,64866.434,3.411275,1.548055,1233.302,1004.168
Сахалинская область,32652.890,32761.952,3.462753,4.333130,1130.689,1419.618
Еврейская автономная область,21690.199,25686.048,4.416723,3.795769,957.996,974.983


In [52]:
data.health_3q2023.describe()

Unnamed: 0,expense_3q2022,expense_3q2023,healthP_3q2022,healthP_3q2023,health_3q2022,health_3q2023
count,82.0,82.0,82.0,82.0,82.0,82.0
mean,21252.736085,23967.866829,4.115228,3.833511,862.43372,896.120622
std,5595.837863,7545.25409,1.24956,1.433418,276.716227,348.509391
min,11811.853,12740.119,1.336844,1.548055,157.906,205.33
25%,17519.2605,19409.43,3.273287,2.903333,720.3235,712.9865
50%,20394.3505,23282.251,3.990225,3.787105,867.3515,891.469
75%,23346.558,26285.46975,4.703184,4.498421,1013.98575,997.34425
max,40514.427,64866.434,8.702164,9.337385,1651.409,2249.667


In [53]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a 1x2 subplot grid
fig = make_subplots(rows=1, cols=2, subplot_titles=[
    'Health expenses in 3Q2022 per h/h member',
    'Health expenses in 3Q2023 per h/h member'])

# First subplot: histogram for 3Q2022
fig.add_trace(go.Histogram(x=data.health_3q2022, autobinx=True), row=1, col=1)

# Second subplot: histogram for 3Q2023
fig.add_trace(go.Histogram(x=data.health_3q2023, autobinx=True), row=1, col=2)

# Update layout for the figure
fig.update_layout(height=500, width=1000, showlegend=False)

# Display the plot
fig.show()

In [54]:
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

def plot_hist(data=pd.Series):

    # Sample data (replace this with your actual data)

    # Define the start number of bins
    start_bins = 10

    # Create the histogram with a starting number of bins
    fig = go.Figure()

    # Add histogram trace
    fig.add_trace(go.Histogram(x=data, nbinsx=start_bins))

    # Define the slider steps
    slider_steps = [
        dict(
            method="update",
            args=[{"nbinsx": [bins]}],
            label=f"{bins} bins"
        )
        for bins in range(1, 83)
    ]

    # Update the layout to include a slider
    fig.update_layout(
        title_text='Health expenses in 3Q2023 per h/h member',
        xaxis_title_text='Expenses',
        yaxis_title_text='Count',
        sliders=[{
            "active": start_bins - 1,
            "currentvalue": {"prefix": "Number of bins: "},
            "pad": {"t": 50},
            "steps": slider_steps
        }],
        height=600,
        width=1000
    )

    # Display the plot
    fig.show()


plot_hist(data.health_3q2023)

In [55]:
plot_hist(np.log(data.health_3q2023))

In [62]:
import plotly.graph_objects as go
import numpy as np
from scipy.stats import norm, gaussian_kde

cur_data = data.health_3q2023
cur_min = cur_data.min()
cur_max = cur_data.max()

# Create histogram
hist = go.Histogram(x=cur_data, histnorm='probability density', name='Histogram', nbinsx=30)
bw_methods = ["scott", "silverman"]

x_vals = np.linspace(cur_min, cur_max, 1000)


# KDE traces
kde_traces = []
for method in bw_methods:

    kde = gaussian_kde(cur_data, bw_method=method)
    
    kde_trace = go.Scatter(
        x=x_vals,
        y = kde.evaluate(x_vals), # Placeholder for actual KDE
        mode='lines',
        name=f'KDE ({method})',
        visible=False
    )
    kde_traces.append(kde_trace)

# Normal distribution trace
normal_trace = go.Scatter(
    x=x_vals,
    y=norm.pdf(x_vals, loc=np.mean(cur_data), scale=np.std(cur_data)),
    mode='lines',
    name='Normal Distribution',
    line=dict(dash='dash')
)

# Add all traces
fig = go.Figure(data=[hist, normal_trace] + kde_traces)

# Update layout with selector
fig.update_layout(
    updatemenus=[
        dict(
            type="dropdown",
            direction="down",
            x=0.7,
            y=1.15,
            showactive=True,
            buttons=list([
                dict(label="None",
                     method="update",
                     args=[{"visible": [True, True] + [False] * len(bw_methods)},
                           {"title": "None"}]),
                dict(label="scott",
                     method="update",
                     args=[{"visible": [True, True] + [method == 'scott' for method in bw_methods]},
                           {"title": "Gaussian KDE (scott)"}]),
                dict(label="silverman",
                     method="update",
                     args=[{"visible": [True, True] + [method == 'silverman' for method in bw_methods]},
                           {"title": "Gaussian KDE (silverman)"}]),

            ]),
        )
    ]
)

fig.show()
