In [1]:
import pandas as pd
import numpy as np
from scipy import stats 
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('housing.csv') #reading data set into a dataframe

In [3]:
data_num = data.select_dtypes(exclude=['object'])  #selecting the numerical values
data_col = data.select_dtypes(exclude=['object']).columns #saving the features name

In [4]:
data_array = np.array(data_num) #converting dataframe to numpy array

### Calculating the Mean

In [7]:
n_mean = np.nanmean(data_array, axis=0) 

In [8]:
### Representing it in a dataframe
mean = {
    'Variable': data_col,
    'Mean': n_mean
} 
mean = pd.DataFrame(mean)
mean

Unnamed: 0,Variable,Mean
0,longitude,-119.569704
1,latitude,35.631861
2,housing_median_age,28.639486
3,total_rooms,2635.763081
4,total_bedrooms,537.870553
5,population,1425.476744
6,households,499.53968
7,median_income,3.870671
8,median_house_value,206855.816909


### Calculating the 'Trimmed Mean'

To peform trimmed mean, we will use the stats.trim_mean function in the scipy library. 
0.1 is equivalant of 10% which represents the trimmed(removed portion of the extreme values) precentage from 
the data

In [None]:
tmean = stats.trim_mean(data_array, 0.1)

In [None]:
tmean = list(tmean)  #converting to list data structure
data_col = list(data_col) #converting to list data structure

trimed_mean ={
    'Variable': data_col,
    'Trimmed mean': tmean
} 
trimed_mean = pd.DataFrame(trimed_mean)
trimed_mean

### Weighted mean

Since we are perform weighted mean, it is ideal for the weight to be the population so we can compensates the
various groups in the sample.

In [None]:
wt_avg = []
for item in data_col:
    if item == 'population':
        ma = np.ma.MaskedArray(data[item], mask=np.isnan((data[item])))
        w_avg = np.ma.average(ma, axis=0, weights= data['population'])
        w_avg = w_avg/2
        wt_avg.append(w_avg)
    else:
        ma = np.ma.MaskedArray(data[item], mask=np.isnan((data[item])))
        w_avg = np.ma.average(ma, axis=0, weights= data['population'])
        wt_avg.append(w_avg)

In [None]:
# wt_avg = list(wt_avg)
weighted_mean ={
    'Variable': data_col,
    'Weighted mean': wt_avg
} 
weighted_mean = pd.DataFrame(weighted_mean)
weighted_mean

### Median

In [None]:
n_median = np.nanmedian(data_array, axis=0) #n_median is just representing the normal median

In [None]:
### Representing it in a dataframe
median = {
    'Variable': data_col,
    'Mean': n_median
} 
median = pd.DataFrame(median)
median

### weighted median

To calcualate this, we will use wquantiles library, this library is based on numpy, which is the only dependence.

In [None]:
# ! pip install wquantiles

In [None]:
import weighted

In [None]:
wt_median = []
weights = data['population']

for item in data_col:  
    if item == 'population':
        w_median = weighted.median(data[item], weights)
        w_median = w_median/2
        wt_median.append(w_median)
    else:
        w_median = weighted.median(data[item], weights)
        wt_median.append(w_median)

In [None]:
weighted_median ={
    'Variable': data_col,
    'Weighted median': wt_median
} 
weighted_median = pd.DataFrame(weighted_median)
weighted_median

### Mode

We will analysis the mode with categorical data

In [None]:
data_cat = data.select_dtypes(exclude=['int64', 'float64'])  #selecting the numerical values
#data_cat = list(data_cat)

In [None]:
n_mode = stats.mode(data_cat) 
n_mode

In [None]:
mode ={
    'Variable': n_mode[0][0][0],
    'higest occurrence': n_mode[1][0][0]
} 

In [None]:
mode

In [None]:
mode = pd.DataFrame(mode, index=[0])
mode