In [1]:
from sklearn.metrics import  accuracy_score
from sklearn.datasets import load_iris
from itertools import combinations
import numpy as np
import pandas as pd
import seaborn as sn

In [2]:
data = pd.read_csv('data/Electricity-problem/electricity-normalized.csv')

In [47]:
class logisticregression():
    def __init__(self,train_data,train_labels,lr=0.1,batch_size=None,epoch=10,print_every = 10):
        dummy_once = np.ones((len(train_data),1))
        self.train_data = np.hstack((dummy_once,train_data))
        self.train_labels = train_labels
        
        self.params = np.zeros((len(self.train_data[0]),1))
        
        self.lr = lr
        self.epoch = epoch
        self.batch_size = batch_size
        self.print_every = print_every
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def cost(self,y,y_pred):
        return -np.mean(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))
    
    def gradient(self,y,y_pred,x):
        return np.dot(np.linalg.inv(np.dot(x.T,x)),np.dot(x.T,(y_pred-y)))
    
    def train(self):
        for i in range(self.epoch):
            y_pred = self.sigmoid(np.dot(self.train_data,self.params))
            loss = self.cost(self.train_labels,y_pred)
            
            gra = self.gradient(self.train_labels,y_pred,self.train_data)
            
            self.params -= gra
            
            if self.print_every:
                if i%self.print_every == 0 or i == self.epoch-1:
                    print('Epoch : {}  Loss: {}'.format(i,loss))
    def predict(self,test_data):
        result = self.sigmoid(np.dot(test_data,self.params[1:])+self.params[0])
        result[result >= 0.5 ] = 1
        result[result < 0.5 ] = 0
        return result
    
    def evaluate(self,test_data,labels):
        accuracy = accuracy_score(self.predict(test_data),labels)
        return accuracy

In [48]:
train_size = 0.6
test_size = 0.4

In [49]:
data.head(2)

Unnamed: 0,date,day,period,nswprice,nswdemand,vicprice,vicdemand,transfer,class,target_class
0,0.0,2,0.0,0.056443,0.439155,0.003467,0.422915,0.414912,UP,1
1,0.0,2,0.021277,0.051699,0.415055,0.003467,0.422915,0.414912,UP,1


In [50]:
data['target_class']=data['class'].apply(lambda x: 1 if x.lower()=='up' else 0)

In [51]:
data.columns

Index(['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice',
       'vicdemand', 'transfer', 'class', 'target_class'],
      dtype='object')

In [52]:
data.columns
dataset = data[['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice',
       'vicdemand', 'transfer']].values
target = data['target_class'].values.reshape(-1,1)

In [53]:
final_data = np.hstack((dataset,target))
np.random.shuffle(final_data)

In [54]:
pd_data = pd.DataFrame(final_data)
pd_data.columns= ['date', 'day', 'period', 'nswprice', 'nswdemand', 'vicprice','vicdemand', 'transfer', 'target_class']
pd_data.tail(2)

Unnamed: 0,date,day,period,nswprice,nswdemand,vicprice,vicdemand,transfer,target_class
45310,0.907526,1.0,0.723404,0.040981,0.403154,0.002686,0.276282,0.611842,0.0
45311,0.867439,2.0,1.0,0.025039,0.372508,0.00161,0.399275,0.84386,0.0


In [55]:
final_data = pd_data.values

train_data = final_data[:int(len(final_data)*train_size)]
test_data = final_data[:int(len(final_data)*test_size)]

train_data.shape,test_data.shape

((27187, 9), (18124, 9))

In [56]:
X_train = train_data[:,:-1]
y_train = train_data[:,-1:]


X_test = test_data[:,:-1]
y_test = test_data[:,-1:]


X_train.shape,y_train.shape,X_test.shape,y_test.shape

((27187, 8), (27187, 1), (18124, 8), (18124, 1))

In [57]:
logistic = logisticregression(X_train,y_train,epoch=50)

In [58]:
logistic.train()

Epoch : 0  Loss: 0.6931471805599453
Epoch : 10  Loss: 0.5379902668495464
Epoch : 20  Loss: 0.5199251943573839
Epoch : 30  Loss: nan
Epoch : 40  Loss: nan
Epoch : 50  Loss: nan
Epoch : 60  Loss: nan
Epoch : 70  Loss: nan
Epoch : 80  Loss: nan
Epoch : 90  Loss: nan
Epoch : 100  Loss: nan
Epoch : 110  Loss: nan
Epoch : 120  Loss: nan
Epoch : 130  Loss: nan
Epoch : 140  Loss: nan
Epoch : 150  Loss: nan
Epoch : 160  Loss: nan
Epoch : 170  Loss: nan
Epoch : 180  Loss: nan
Epoch : 190  Loss: nan
Epoch : 199  Loss: nan




In [46]:
logistic.evaluate(X_test,y_test)

0.7549106157581108