In [8]:
import numpy as np
import pandas as pd
from helwig import *

## Helwig Method

Hellwig's method is a method of selection of variables in linear model. It is widely used in Poland, probably only in Poland because it is really hard to find it in any scientific paper written in English.

$m_{k}$ - set of variables in k'th combination (there are $2^{p}-1$ combinations, where p is number of variables)<br>
$r_{j}$ - correlation between $Y$ and $X_{j}$<br>
$r_{ij}$ - correlation between $X_{i}$ and $X_{j}$<br>
$H_{k}=\sum\limits_{j \in m_{k}}\frac{r_{j}^2}{\sum\limits_{i \in m_{k}}|r_{ij}|}$

Choose the combination of variables with the highest $H_{k}$

### Variable initiation

In [9]:
y = (728530.00,809045.00,29586.00,551013.00,904632.00,168420.00,296005.00,477694.00,853678.00,309096.00)
x1 = (3.00,7.00,3.00,4.00,5.00,6.00,6.00,5.00,4.00,4.00)
x2 = (63.00,50.00,43.00,44.00,43.00,67.00,54.00,76.00,31.00,44.00)
x3 = (46867.00,49536.00,49054.00,44196.00,38832.00,40065.00,37046.00,35847.00,34970.00,33070.00)
x4 = (46.00,24.00,31.00,75.00,81.00,43.00,49.00,40.00,90.00,75.00)
x5 = (919324.00,926305.00,2043301.00,462866.00,1640315.00,217086.00,341262.00,1486963.00,346056.00,372790.00)



In [10]:
h2 = Helwig(y,x1,x2,x3,x4,x5)
h2.compute_helwig()

('x4',)

### Sample

In [11]:
Y=(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5)
X1=(2,3,2,3,4,3,4,3,4,5,4,5,4,5,5,6,5,6,7,8)
X2=(5,5,5,5,5,5,5,4,4,4,4,4,4,3,3,3,3,2,2,2)
X3=(6,5,4,8,3,9,8,7,1,1,2,2,0,0,9,9,8,6,5,4)

In [12]:
h1 = Helwig(Y,X1,X2,X3)
h1.compute_helwig()

('x1', 'x2')

Confirmation: https://www.goldenline.pl/grupy/Komputery_Internet/r/implementacja-metody-hellwiga-w-r,446734/<br>
"""
Przykład:<br>
-------------------<br>
Y<-c(1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5) <br>
X1<-c(2,3,2,3,4,3,4,3,4,5,4,5,4,5,5,6,5,6,7,8) <br>
X2<-c(5,5,5,5,5,5,5,4,4,4,4,4,4,3,3,3,3,2,2,2) <br>
X3<-c(6,5,4,8,3,9,8,7,1,1,2,2,0,0,9,9,8,6,5,4)<br>
Dane<-data.frame(Y,X1,X2,X3)<br>

ao.hellwig(4, Dane, "pearson")<br>
[1] 2 3<br>
<br>
(czyli Hellwig stawia na zmienne X1 i X2)<br>
"""<br>


In [13]:
h1.results

{('x1',): 0.7713675213675218,
 ('x2',): 0.8691796008869175,
 ('x3',): 0.010854998659876727,
 ('x1', 'x2'): 0.8696082545422866,
 ('x1', 'x3'): 0.7291573434763855,
 ('x2', 'x3'): 0.8594944373527115,
 ('x1', 'x2', 'x3'): 0.8326915859037232}

In [14]:
h1.get_corr_table()

Unnamed: 0,x1,x2,x3
x1,1.0,-0.886536,-0.072776
x2,-0.886536,1.0,0.023898
x3,-0.072776,0.023898,1.0
