In [1]:
from sklearn.metrics import  accuracy_score
from sklearn.datasets import load_iris
from itertools import combinations
import numpy as np
import pandas as pd
import ast

In [2]:
data = load_iris()

In [3]:
data['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
class logisticregression():
    def __init__(self,train_data,train_labels,lr=0.01,batch_size=None,epoch=10,print_every = 10):
        dummy_once = np.ones((len(train_data),1))
        self.train_data = np.hstack((dummy_once,train_data))
        self.train_labels = train_labels
        
        self.params = np.zeros((len(self.train_data[0]),1))
        
        self.lr = lr
        self.epoch = epoch
        self.batch_size = batch_size
        self.print_every = print_every
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def cost(self,y,y_pred):
        return -np.mean(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))
    
    def gradient(self,y,y_pred,x):
        return np.dot(x.T,(y_pred-y))
    
    def train(self):
        for i in range(self.epoch):
            y_pred = self.sigmoid(np.dot(self.train_data,self.params))
            loss = self.cost(self.train_labels,y_pred)
            
            gra = self.gradient(self.train_labels,y_pred,self.train_data)
            
            self.params -= self.lr*gra
            
            if self.print_every:
                if i%self.print_every == 0 or i == self.epoch-1:
                    print('Epoch : {}  Loss: {}'.format(i,loss))
    def predict(self,test_data):
        result = self.sigmoid(np.dot(test_data,self.params[1:])+self.params[0])
        result[result >= 0.5 ] = 1
        result[result < 0.5 ] = 0
        return result
    
    def evaluate(self,test_data,labels):
        accuracy = accuracy_score(self.predict(test_data),labels)
        return accuracy

In [5]:
def get_combinations(items,number):
    comb_list = []
    for c in combinations(items, number):
        c = list(c)
        c.sort()
        comb_list.append(c)
        
    return comb_list

In [6]:
def get_combinations_with(items,item,number):
    comb_list = []
    for i in get_combinations(items,number):
        if item in i:
            comb_list.append(i)
        
    return comb_list
    

Given a data and a set of features this method prepares the data 



In [7]:
def wrapper_helper(data,feature_names,target_name,train_size = 0.6,test_size = 0.4):
    dataset = data['data'].copy()
    target = data['target'].reshape(-1,1)
    
    final_data = np.hstack((dataset,target))
    np.random.shuffle(final_data)
    
    pd_data = pd.DataFrame(final_data)
    pd_data.columns= ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','target']
    pd_data = pd_data[(pd_data['target']==0) | (pd_data['target']==1)]
    pd_data.tail(2)
    

    col_names = np.concatenate((feature_names,target_name),axis=None)
    final_data = pd_data[col_names].values

    train_data = final_data[:int(len(final_data)*train_size)]
    test_data = final_data[int(len(final_data)*train_size):]
    
    

    train_data.shape,test_data.shape 
    
    X_train = train_data[:,:-1]
    y_train = train_data[:,-1:]


    X_test = test_data[:,:-1]
    y_test = test_data[:,-1:]


    return X_train,y_train,X_test,y_test

In [8]:
def wrapper_method(data,names,target_class='target',k=2,visualize=False):
    number = 1
    classes = get_combinations(names,number)
    
    result = []
    while True:
        wrapper_output = {}

        for i in range(len(classes)):
            X_train,y_train,X_test,y_test = wrapper_helper(data,classes[i],[target_class])

            logistic = logisticregression(X_train,y_train,epoch=10,print_every=None)
            logistic.train()

            result = logistic.evaluate(X_test,y_test)
            wrapper_output[i] = round(result,3)

        
        
        best = sorted(wrapper_output,key=wrapper_output.get)
        best = best[::-1]
        
        if visualize:
            print('================================\n')
            print('INPUT ==>',classes,'\n')
            print('SELECTED ==>',classes[best[0]],'\n')
            print('ACCURACY ==>',wrapper_output)
        
        
        if number == k:
            new_wrapper_output = {}
            for k in wrapper_output.keys():
                new_wrapper_output[str(classes[k])] = wrapper_output[k]
                
            result = new_wrapper_output
            break
        
        lis = get_combinations_with(names,classes[best[0]][0],number+1)
        if visualize:
            print('\n NEW LIST ==>',lis)
        
        number +=1
        classes = lis
    
    return result,classes[best[0]]

In [9]:

wrapper_method(data,data['feature_names'],4,visualize=True)

KeyError: "['4'] not in index"

In [10]:
data = pd.read_csv('../../dataset_playground/data/Electricity-problem/electricity-normalized.csv')

In [11]:
data['target_class']=data['class'].apply(lambda x: 1 if x.lower()=='up' else 0)
col_names = data.columns
col_names

Index(['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice',
       'vicdemand', 'transfer', 'class', 'target_class'],
      dtype='object')

In [12]:
elec_data = data[['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice',
       'vicdemand', 'transfer', 'target_class']]

In [13]:
wrapper_method(elec_data,['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice',
       'vicdemand', 'transfer'],[ 'target_class'],3,8)

KeyError: 'data'