# Variância, desvio padrão e coeficiente de variação

In [25]:
import numpy as np

In [26]:
dados_impar = np.array([150,151,152,152,153,154,155,155,155])

## Variância

In [27]:
media = dados_impar.sum() / len(dados_impar)
media

153.0

In [28]:
desvio = abs(dados_impar - media)
desvio

array([3., 2., 1., 1., 0., 1., 2., 2., 2.])

In [29]:
desvio = desvio ** 2
desvio

array([9., 4., 1., 1., 0., 1., 4., 4., 4.])

In [30]:
desvio_somatorio = desvio.sum()
desvio_somatorio

28.0

In [31]:
variancia = desvio_somatorio / len(dados_impar)
variancia

3.111111111111111

In [32]:
def get_variancia(dados):
    media = dados.sum() / len(dados)
    desvio = abs(dados - media) ** 2
    desvio_somatorio = desvio.sum()
    variancia = desvio_somatorio / len(dados)
    return variancia

In [33]:
get_variancia(dados_impar)

3.111111111111111

#### Bibliotecas

In [34]:
import numpy as np
import statistics
from scipy import ndimage

In [35]:
np.var(dados_impar)

3.111111111111111

In [36]:
statistics.variance(dados_impar)

3

In [37]:
ndimage.variance(dados_impar)

3.111111111111111

## Desvio Padrão

In [45]:
variancia = np.var(dados_impar)

In [46]:
desvio_padrao = variancia ** (1/2)
desvio_padrao

1.7638342073763937

In [83]:
def get_variancia_desvio_padrao(dados):
    media = dados.sum() / len(dados)
    desvio = abs(dados - media) ** 2
    desvio_somatorio = desvio.sum()
    variancia = desvio_somatorio / len(dados)
    desvio_padrao = variancia ** (1/2)
    return variancia, desvio_padrao

In [84]:
get_variancia_desvio_padrao(dados_impar)

(3.111111111111111, 1.7638342073763937)

#### Bibliotecas

In [60]:
import numpy as np
import statistics
from scipy import stats
import math

In [55]:
math.sqrt(np.var(dados_impar))

1.7638342073763937

In [51]:
np.std(dados_impar)

1.7638342073763937

In [54]:
statistics.stdev(dados_impar)

1.7320508075688772

In [62]:
stats.tstd(dados_impar, ddof=0)

1.7638342073763937

## Coeficiente de variação

In [71]:
media = dados_impar.sum() / len(dados_impar)
media

153.0

In [72]:
desvio = abs(dados_impar - media) ** 2
desvio_somatorio = desvio.sum()
variancia = desvio_somatorio / len(dados_impar)
variancia

3.111111111111111

In [73]:
desvio_padrao = variancia ** (1/2)
desvio_padrao

1.7638342073763937

In [75]:
coeficiente_variacao = (desvio_padrao / media) * 100
coeficiente_variacao

1.1528328152786886

In [85]:
def get_medidas_dispercao(dados):
    media = dados.sum() / len(dados)
    desvio = abs(dados - media) ** 2
    desvio_somatorio = desvio.sum()
    variancia = desvio_somatorio / len(dados)
    desvio_padrao = variancia ** (1/2)
    coeficiente_variacao = (desvio_padrao / media) * 100
    return variancia, desvio_padrao, coeficiente_variacao

In [87]:
get_medidas_dispercao(dados_impar)

(3.111111111111111, 1.7638342073763937, 1.1528328152786886)

#### Bibliotecas

In [88]:
from scipy import stats

In [89]:
stats.variation(dados_impar) * 100

1.1528328152786886

## Variância (dados agrupados)

In [91]:
import pandas as pd

In [102]:
dados = {
    'inferior': [150,154,158,162,166,170],
    'superior': [154,158,162,165,170,174],
    'fi': [5,9,11,7,5,3]
}

In [103]:
dataset = pd.DataFrame(dados)
dataset

Unnamed: 0,inferior,superior,fi
0,150,154,5
1,154,158,9
2,158,162,11
3,162,165,7
4,166,170,5
5,170,174,3


In [104]:
dataset['xi'] = (dataset['superior'] + dataset['inferior']) / 2
dataset

Unnamed: 0,inferior,superior,fi,xi
0,150,154,5,152.0
1,154,158,9,156.0
2,158,162,11,160.0
3,162,165,7,163.5
4,166,170,5,168.0
5,170,174,3,172.0


In [105]:
dataset['fi_xi'] = dataset['fi'] * dataset['xi']
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi
0,150,154,5,152.0,760.0
1,154,158,9,156.0,1404.0
2,158,162,11,160.0,1760.0
3,162,165,7,163.5,1144.5
4,166,170,5,168.0,840.0
5,170,174,3,172.0,516.0


In [106]:
dataset['Fi'] = 0
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi,Fi
0,150,154,5,152.0,760.0,0
1,154,158,9,156.0,1404.0,0
2,158,162,11,160.0,1760.0,0
3,162,165,7,163.5,1144.5,0
4,166,170,5,168.0,840.0,0
5,170,174,3,172.0,516.0,0


In [107]:
frequencia_acumulada = []
acumulado = 0
for i in range(len(dataset)):
    acumulado += dataset.values[i, 2]
    frequencia_acumulada.append(acumulado)
    
frequencia_acumulada

[5.0, 14.0, 25.0, 32.0, 37.0, 40.0]

In [108]:
dataset['Fi'] = frequencia_acumulada
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi,Fi
0,150,154,5,152.0,760.0,5.0
1,154,158,9,156.0,1404.0,14.0
2,158,162,11,160.0,1760.0,25.0
3,162,165,7,163.5,1144.5,32.0
4,166,170,5,168.0,840.0,37.0
5,170,174,3,172.0,516.0,40.0


In [109]:
dataset['xi^2'] = dataset['xi'] * dataset['xi']
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi,Fi,xi^2
0,150,154,5,152.0,760.0,5.0,23104.0
1,154,158,9,156.0,1404.0,14.0,24336.0
2,158,162,11,160.0,1760.0,25.0,25600.0
3,162,165,7,163.5,1144.5,32.0,26732.25
4,166,170,5,168.0,840.0,37.0,28224.0
5,170,174,3,172.0,516.0,40.0,29584.0


In [110]:
dataset['xi^2_fi'] = dataset['xi^2'] * dataset['fi']
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi,Fi,xi^2,xi^2_fi
0,150,154,5,152.0,760.0,5.0,23104.0,115520.0
1,154,158,9,156.0,1404.0,14.0,24336.0,219024.0
2,158,162,11,160.0,1760.0,25.0,25600.0,281600.0
3,162,165,7,163.5,1144.5,32.0,26732.25,187125.75
4,166,170,5,168.0,840.0,37.0,28224.0,141120.0
5,170,174,3,172.0,516.0,40.0,29584.0,88752.0


In [114]:
colunas_ordenadas = ['inferior', 'superior', 'fi', 'xi', 'fi_xi', 'xi^2', 'xi^2_fi', 'Fi']
dataset = dataset[colunas_ordenadas]
dataset

Unnamed: 0,inferior,superior,fi,xi,fi_xi,xi^2,xi^2_fi,Fi
0,150,154,5,152.0,760.0,23104.0,115520.0,5.0
1,154,158,9,156.0,1404.0,24336.0,219024.0,14.0
2,158,162,11,160.0,1760.0,25600.0,281600.0,25.0
3,162,165,7,163.5,1144.5,26732.25,187125.75,32.0
4,166,170,5,168.0,840.0,28224.0,141120.0,37.0
5,170,174,3,172.0,516.0,29584.0,88752.0,40.0


In [123]:
variancia_agrupados = (dataset['xi^2_fi'].sum() / dataset['fi'].sum()) - math.pow(dataset['fi_xi'].sum() / dataset['fi'].sum(), 2)
variancia_agrupados

32.168593749996944

In [124]:
desvio_padrado = math.sqrt(variancia_agrupados)
desvio_padrado

5.671736396377828