In [45]:
import pandas as pd
import scipy.stats as stats
import plotly.graph_objects as go
import plotly.express as px

In [47]:
df = pd.read_csv('tarros.csv')

In [48]:
display(df)

Unnamed: 0,Tarro,Lote 1,Lote 2,Lote 3,Lote 4,Lote 5
0,Tarro 1,29.39,30.63,27.16,31.03,29.67
1,Tarro 2,31.51,32.1,26.63,30.98,29.32
2,Tarro 3,30.88,30.11,25.31,28.95,26.87
3,Tarro 4,27.63,29.63,27.66,31.45,31.59
4,Tarro 5,28.85,29.68,27.1,29.7,29.41
5,Media,29.65,30.43,26.77,30.42,29.37


In [4]:
print("\nEstadísticas:")
print(df.describe())


Estadísticas:
          Lote 1     Lote 2     Lote 3     Lote 4     Lote 5
count   6.000000   6.000000   6.000000   6.000000   6.000000
mean   29.651667  30.430000  26.771667  30.421667  29.371667
std     1.397175   0.909263   0.800535   0.940838   1.501498
min    27.630000  29.630000  25.310000  28.950000  26.870000
25%    28.985000  29.787500  26.665000  29.880000  29.332500
50%    29.520000  30.270000  26.935000  30.700000  29.390000
75%    30.572500  30.580000  27.145000  31.017500  29.605000
max    31.510000  32.100000  27.660000  31.450000  31.590000


In [5]:
media = df[df['Tarro'] == 'Media']

In [6]:
display(media)

Unnamed: 0,Tarro,Lote 1,Lote 2,Lote 3,Lote 4,Lote 5
5,Media,29.65,30.43,26.77,30.42,29.37


In [51]:
data = {
    "Tarro": ["Tarro 1", "Tarro 2", "Tarro 3", "Tarro 4", "Tarro 5"],
    "Lote 1": [29.39, 31.51, 30.88, 27.63, 28.85],
    "Lote 2": [30.63, 32.10, 30.11, 29.63, 29.68],
    "Lote 3": [27.16, 26.63, 25.31, 27.66, 27.10],
    "Lote 4": [31.03, 30.98, 28.95, 31.45, 29.70],
    "Lote 5": [29.67, 29.32, 26.87, 31.59, 29.41],
}

df = pd.DataFrame(data)

In [31]:
# Realizar ANOVA
f_value, p_value = stats.f_oneway(
    df["Lote 1"], df["Lote 2"], df["Lote 3"], df["Lote 4"], df["Lote 5"]
)


In [None]:
# Calcular la suma de cuadrados, grados de libertad y razón F manualmente
grand_mean = pd.concat([df[col] for col in df.columns[1:]]).mean()
# Suma de cuadrados total
ss_total = sum(
    (df[col] - grand_mean).pow(2).sum() for col in df.columns[1:])

In [33]:
print(grand_mean)
print(ss_total)

29.329599999999996
78.05329600000002


In [None]:
# Suma de cuadrados total
ss_between = sum(
    len(df) * (df[col].mean() - grand_mean) ** 2 for col in df.columns[1:])

In [35]:
print(ss_between)

45.256376000000074


In [36]:
# Suma de cuadrados entre grupos
ss_within = ss_total - ss_between  # Suma de cuadrados dentro de grupos

df_between = len(df.columns[1:]) - 1  # Grados de libertad entre grupos
df_within = len(df) * len(df.columns[1:]) - len(df.columns[1:])  # Grados de libertad dentro de grupos
df_total = df_between + df_within  # Grados de libertad total

ms_between = ss_between / df_between  # Media de los cuadrados entre
ms_within = ss_within / df_within  # Media de los cuadrados dentro

razon_f = ms_between / ms_within  # Razón F

In [37]:
print(ss_within)
print(df_between)
print(df_within)
print(df_total)
print(ms_between)
print(ms_within)
print(razon_f)


32.79691999999994
4
20
24
11.314094000000019
1.6398459999999973
6.899485683411758


In [38]:
anova_table = pd.DataFrame(
    {
        "Fuente": ["Lote", "Error", "Total"],
        "Grados de libertad": [df_between, df_within, df_total],
        "Suma de cuadrados": [ss_between, ss_within, ss_total],
        "Media de los cuadrados": [ms_between, ms_within, ""],
        "Razón F": [razon_f, "", ""],
        "Prob > F": [p_value, "", ""],
    }
)

In [39]:
display(anova_table)

Unnamed: 0,Fuente,Grados de libertad,Suma de cuadrados,Media de los cuadrados,Razón F,Prob > F
0,Lote,4,45.256376,11.314094,6.899486,0.001166
1,Error,20,32.79692,1.639846,,
2,Total,24,78.053296,,,


Degrees of Freedom: It is the number of independent values that can vary in each calculation.

Sum of Squares (SS): Measures the variability between all data points.

Mean Square (MS): It is obtained by dividing the sum of squares by the degrees of freedom.

F Ratio: It is the result of dividing the mean square between groups by the mean square within groups.

P > F: Represents the probability of obtaining a difference between the groups as large as the observed one, assuming there are no real differences between them.

In [52]:
df_melted = df.melt(id_vars=["Tarro"], var_name="Lote", value_name="Resistencia")

# Crear el box plot
fig_box = px.box(df_melted, x="Lote", y="Resistencia", title="Box Plot: Torque Resistance vs. Lot")
fig_box.update_layout(
    xaxis_title="Lot",
    yaxis_title="Torque Resistance",
    template="plotly_white"
)
fig_box.show()

In [53]:
# Crear el scatter plot
fig_scatter = px.scatter(df_melted, x="Lote", y="Resistencia", title="Scatter Plot: Torque Resistance vs. Lot")
fig_scatter.update_layout(
    xaxis_title="Lot",
    yaxis_title="Torque Resistance",
    template="plotly_white"
)
fig_scatter.show()

In [58]:

# Combinar ambas gráficas en una sola figura
fig = go.Figure(data=fig_box.data + fig_scatter.data)
fig.update_layout(
    xaxis_title="Lot",
    yaxis_title="Torque Resistance",
    template="plotly_white",
    title="Combined Box and Scatter Plot: Torque Resistance vs. Lot"
)

fig.show()