-
Notifications
You must be signed in to change notification settings - Fork 0
/
feat_trans.py
63 lines (57 loc) · 1.78 KB
/
feat_trans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
import numpy as np
BIN = 20
TYPE_PROJECT = 1
TYPE_MAP = 2
class DiscreteTrans():
def __init__(self):
self.__dic = {}
self.__value = 0
#protected method
def _setup_dic(self, name):
self.__dic[name] = self.__value
self.__value = self.__value + 1
def _find(self, name):
return name in self.__dic
def _map_transfer(self, df):
df = df.map(self.__dic)
return df
class ContinuationTrans():
def __init__(self, bin = BIN):
self._bin = bin
self.__label = []
#protected method
def _setup_bin(self):
for i in range(self._bin):
self.__label.append(i)
def _project_transfer(self, df):
return pd.cut(df, self._bin, labels = self.__label)
class FeatTransfer(ContinuationTrans, DiscreteTrans):
def __init__(self):
#self.pro = ContinuationTrans()
#self.map = DiscreteTrans()
ContinuationTrans.__init__(self)
DiscreteTrans.__init__(self)
self.__type = 0
#private method
def __cst_continuation_trans(self, df):
self._setup_bin()
def __cst_discrete_trans(self, df):
for value in df.values:
if(self._find(value) == False):
self._setup_dic(value)
#public method
def cst_trans(self, df):
count = df.nunique()
value = df.values[0]
if(isinstance(value, (int, float)) == True and count >= self._bin):
self.__type = TYPE_PROJECT
self.__cst_continuation_trans(df)
else:
self.__type = TYPE_MAP
self.__cst_discrete_trans(df)
def transfer(self, df):
if(self.__type == TYPE_MAP):
return self._map_transfer(df)
elif(self.__type == TYPE_PROJECT):
return self._project_transfer(df)