In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("train.csv", index_col=[0])

In [3]:
df.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


In [4]:
df["Weight"] = np.random.rand(len(df), 1)

In [5]:
def weighted_avg(x, wts):
    return np.average(x, weights=wts, axis=0)

def weighted_std(x, wts):
    average = np.average(x, weights=wts, axis=0)
    xiance = np.average((x - average) ** 2, weights=wts, axis=0)
    return np.sqrt(xiance)

def weighted_skew(x, wts):
    """Calculates the weighted skewness"""
    return (np.average((x - weighted_avg(x, wts))**3, weights=wts, axis=0) /
            weighted_avg(x, wts)**(1.5))

def weighted_kurtosis(x, wts):
    """Calculates the weighted skewness"""
    return (np.average((x - weighted_avg(x, wts))**4, weights=wts, axis=0) /
            weighted_avg(x, wts)**(2))

In [6]:
funcs = [weighted_avg, weighted_std, weighted_skew, weighted_kurtosis]
metrics = ["LotArea", "LotFrontage", "SalePrice"]

In [7]:
def weighted_functions(df, funcs, metrics):
    names = [func.__name__ for func in funcs]
    groups = []
    
    for func in funcs:
        gb = df.groupby(["MSZoning", "SaleCondition"]).apply(lambda x: pd.Series(func(x[metrics], x["Weight"]), metrics))
        groups.append(gb)
        
    gb = df.groupby(["MSZoning", "SaleCondition"]).apply(lambda x: pd.Series(np.median(x[metrics]), metrics))
    groups.append(gb)
    names.append("median")
    
    return pd.concat(groups, axis=1, keys=names)

In [8]:
weighted_functions(df, funcs, metrics=metrics)

Unnamed: 0_level_0,Unnamed: 1_level_0,weighted_avg,weighted_avg,weighted_avg,weighted_std,weighted_std,weighted_std,weighted_skew,weighted_skew,weighted_skew,weighted_kurtosis,weighted_kurtosis,weighted_kurtosis,median,median,median
Unnamed: 0_level_1,Unnamed: 1_level_1,LotArea,LotFrontage,SalePrice,LotArea,LotFrontage,SalePrice,LotArea,LotFrontage,SalePrice,LotArea,LotFrontage,SalePrice,LotArea,LotFrontage,SalePrice
MSZoning,SaleCondition,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
C (all),Abnorml,8128.428483,69.92416,65834.311754,516.557832,18.865436,25122.75,30.69761,14.022568,-39151.57,1703.68,71.061346,148111100.0,8470.0,8470.0,8470.0
C (all),Alloca,8712.0,66.0,55993.0,0.0,0.0,7.275958e-12,0.0,0.0,-2.907166e-41,0.0,0.0,8.939087e-55,8712.0,8712.0,8712.0
C (all),Normal,8941.890003,57.167528,87354.13752,1980.979832,10.56975,41383.4,19894.39,12.168483,-397323.0,1929900.0,104.150542,470695900.0,9770.0,9770.0,9770.0
FV,Abnorml,3838.098921,36.55924,217295.808201,1008.667556,10.57875,83072.18,1084.195,1.346318,3202539.0,102224.9,13.687417,1397667000.0,3392.0,3392.0,3392.0
FV,Normal,6512.552016,,214364.060441,3122.052075,,48776.03,-6040.983,,1012556.0,3379951.0,,384421000.0,,,
FV,Partial,7246.054983,,225009.446136,2449.222178,,54522.46,-6874.1,,1979672.0,1343992.0,,714271900.0,,,
RH,Abnorml,8198.085644,53.45702,161972.753392,2481.736885,9.676051,33495.76,-11801.71,-3.226473,-32477.16,1182003.0,9.841783,57795290.0,7800.0,7800.0,7800.0
RH,Normal,7083.844495,,114313.564473,2428.400126,,26411.08,11409.75,,-49587.42,1798477.0,,55788070.0,,,
RL,Abnorml,11228.1522,,167013.610408,4579.393186,,95092.5,113492.4,,48467030.0,18822950.0,,68289090000.0,,,
RL,AdjLand,7766.068887,56.378831,98877.376004,1277.628728,10.660446,21988.25,-1149.365,-3.811208,163693.5,71580.57,12.926081,29466640.0,8285.0,8285.0,8285.0
