#This Notebook will describe the process necessary to compare and visualize the data in data dashboard

Here I will follow a specefic guideline from this link: https://realpython.com/python-statistics/

In [5]:
#Now we need to import important modules
import math
import statistics
import numpy as np
import scipy.stats
import pandas as pd

Now time to create data to interact with.

In [6]:
#Data
x = [8.0, 1, 2.5, 4, 28.0]
x_with_nan = [8.0, 1, 2.5, math.nan, 4, 28.0]
print(x)
x_with_nan

[8.0, 1, 2.5, 4, 28.0]


[8.0, 1, 2.5, nan, 4, 28.0]

In [7]:
#Now, create np.ndarray and pd.Series objects that correspond to x and x_with_nan
y, y_with_nan = np.array(x), np.array(x_with_nan)
z, z_with_nan = pd.Series(x), pd.Series(x_with_nan)
print(y)
print(y_with_nan)
print(z)
print(z_with_nan)

[ 8.   1.   2.5  4.  28. ]
[ 8.   1.   2.5  nan  4.  28. ]
0     8.0
1     1.0
2     2.5
3     4.0
4    28.0
dtype: float64
0     8.0
1     1.0
2     2.5
3     NaN
4     4.0
5    28.0
dtype: float64


### Now we are going to analyze the given data. Statistical analysis will let us know the details.

##### To analyze the data we can do a number of operations on the dataset.
##### Mean, Weighted Mean, Geometric Mean, Harmonic Mean, Median, Mode

In [8]:
#Calculating mean without a nan value
mean_ = statistics.mean(x)
mean_2 = statistics.fmean(x)
print(mean_)
print(mean_2)

8.7
8.7


In [9]:
#calculating mean with nan value
mean_ = statistics.mean(x_with_nan)
mean_2 = statistics.fmean(x_with_nan)
print(mean_)
print(mean_2)

nan
nan


In [10]:
#As we can see if there is a nan value then the mean will be always nan. If we dont want that
#we need to use this
mean_=np.nanmean(x_with_nan)
mean_2=np.nanmean(y_with_nan)
print(mean_)
print(mean_2)

8.7
8.7


In [11]:
#Now calculating weighted mean
#If there is no nan values
#if there is nan value then WM can not be calculated
x = [8.0, 1, 2.5, 4, 28.0]
w = [0.1, 0.2, 0.3, 0.25, 0.15]
y, z, w = np.array(x), pd.Series(x), np.array(w)
weighted_mean = np.average(y, weights=w)
print(weighted_mean)

6.95


In [13]:
# Now we will calculate Harmonic mean
#The harmonic mean is the reciprocal of the mean of the reciprocals of all items in the dataset

hmean_ = statistics.harmonic_mean(x)
print(hmean_)

hmean_2 = statistics.harmonic_mean(x_with_nan)
print(hmean_2)

2.7613412228796843
nan


In [14]:
#The geometric mean is the ùëõ-th root of 
#the product of all ùëõ elements ùë•·µ¢ in a dataset ùë•: ‚Åø‚àö(Œ†·µ¢ùë•·µ¢), where ùëñ = 1, 2, ‚Ä¶, ùëõ.
#gmean can be get by only non nan value sets

gmean = statistics.geometric_mean(x)
print(gmean)

4.67788567485604


##### The above code describes different types of mean. In data analysis this is one of a must analysis part

##### Now we will discuss about Median

In [18]:
#The sample median is the middle element of a sorted dataset. The main difference between the 
# behavior of the mean and median is related to dataset outliers or extremes.

#median calculation with numpy

median_ = np.median(x)
print(median_)

median_2 = np.median(x[:-1])
print(median_2)

4.0
