# Descriptive Statictics

In [2]:
import math 
import statistics
import numpy as np
import scipy.stats
import pandas as pd

In [3]:
x = [8.0, 1, 2.5, 4, 28.0]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]
print(x)
print(x_with_nan)

[8.0, 1, 2.5, 4, 28.0]
[8.0, 1, 2.5, nan, 4, 28.0]


In [4]:
y, y_with_nan = np.array(x), np.array(x_with_nan)
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(y)
print(y_with_nan)
print(z)
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


# Measure of central tendency

## Mean

In [6]:
# tanpa menggunakan library apapun
mean_ = sum(x) / len(x)
mean_

8.7

In [7]:
# menggunakan linrary
mean_ = statistics.mean(x)
print(mean_)

8.7


In [9]:
mean_ = statistics.mean(x_with_nan)
print(mean_)
sum(x_with_nan)

nan


nan

In [10]:
#numpy, ini function
mean_ = np.mean(y)
mean_

8.7

In [12]:
#numpy, ini metode
mean_ = y.mean()
mean_

8.7

In [9]:
print(np.mean(y_with_nan))
print(y_with_nan.mean())

nan
nan


In [14]:
# tetap dihitung, si nan nya dihiraukan pake nanmean
np.nanmean(y_with_nan)

8.7

In [12]:
mean_ = z.mean()
mean_

8.7

In [15]:
# 
z_with_nan.mean()

8.7

## Weighted mean

In [16]:
0.2 * 2 + 0.5 * 4 + 0.3 * 8

4.8

In [16]:
# implementasi dari python langsung
x = [8.0, 1, 2.5, 4, 28.0]
w = [0.1, 0.2, 0.3, 0.25, 0.15]

wmean = sum(w[i] * x[i] for i in range(len(x))) / sum(w)
print(wmean)

wmean = sum(x_ * w_ for (x_, w_) in zip(x, w)) / sum(w)
print(wmean)

6.95
6.95


In [18]:
# numpy 
y, z, w = np.array(x), pd.Series(x), np.array(w)

wmean = np.average(y, weights=w)
print(wmean)

wmean = np.average(z, weights=w)
print(wmean)

6.95
6.95


In [19]:
# wise product
(w * y). sum() / w.sum()

6.95

## Harmonic mean

In [20]:
hmean = len(x) / sum(1 / item for item in x)
hmean

2.7613412228796843

In [21]:
hmean = statistics.harmonic_mean(x)
hmean

2.7613412228796843

In [22]:
scipy.stats.hmean(y)

2.7613412228796843

In [23]:
scipy.stats.hmean(z)

2.7613412228796843

## Geometric mean

In [24]:
gmean = 1

for item in x:
    gmean *= item

gmean **= 1 / len(x)
gmean

4.677885674856041

In [25]:
scipy.stats.gmean(y)

4.67788567485604

In [26]:
scipy.stats.gmean(z)

4.67788567485604

## median

In [27]:
n = len (x)
if n % 2:
    median_ = sorted(x)[round(0.5*(n-1))]
else:
    x_ord, index = sorted(x), round(0.5 * n)
    median_ = 0.5 * (x_ord[index-1] + x_ord[index])
    
median_

4

In [28]:
x

[8.0, 1, 2.5, 4, 28.0]

In [21]:
print(sorted(x[:-1]))
statistics.median_low(x[:-1])

[1, 2.5, 4, 8.0]


2.5

In [31]:
statistics.median_high(x[:-1])

4

In [22]:
print(statistics.median(x_with_nan))
print(statistics.median_low(x_with_nan))
print(statistics.median_high(x_with_nan))
sorted(x_with_nan)

6.0
4
8.0


[1, 2.5, 4, 8.0, nan, 28.0]

In [23]:
median_ = np.median(y)
print(median_)
z.median()

4.0


4.0

In [35]:
median_ = np.median(y[:-1])
print(median_)

3.25


## Mode

In [25]:
u = [2, 3, 2, 8, 12]

v = [12, 15, 12, 15, 21, 15, 12]

mode_ = max((u.count(item), item) for item in set (u))[1]
mode_

2

In [26]:
set(u)

{2, 3, 8, 12}

In [37]:
mode_ = statistics.mode(u)
mode_

2

In [38]:
u, v = np.array(u), np.array(v)

mode_ = scipy.stats.mode(u)
mode_

ModeResult(mode=2, count=2)

In [39]:
mode_ = scipy.stats.mode(v)
mode_

ModeResult(mode=12, count=3)

In [41]:
print(mode_.mode)
print(mode_.count)

12
3


In [42]:
u, v, w = pd.Series(u), pd.Series(v), pd.Series([2, 2, math.nan])

print(u.mode())
print(v.mode())
print(w.mode())

0    2
dtype: int32
0    12
1    15
dtype: int32
0    2.0
dtype: float64


### measures of variability

In [43]:
n = len(x)

mean_ = sum(x)/n

var_ = sum((item - mean_)**2 for item in x) / (n-1)
var_

123.19999999999999

In [44]:
var_ = statistics.variance(x)
var_

123.2

In [45]:
var_ = np.var(y, ddof=1)
var_

123.19999999999999

In [46]:
var_ = y.var(ddof=1)
var_

123.19999999999999

In [47]:
z.var(ddof=1)

123.19999999999999

In [48]:
std_ = var_** 0.5
std_

11.099549540409285

In [49]:
std_ = statistics.stdev(x)
std_

11.099549540409287

In [50]:
np.std(y, ddof=1)

11.099549540409285

In [51]:
y.std(ddof=1)

11.099549540409285

In [52]:
z.std(ddof=1)

11.099549540409285

## Skewness