In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

class Logistic_sklearn() :
  def readData(self, file) :
    # Data pre-processing.
    data = pd.read_csv(file)
    # Converting dependent variable 'fire' in binary form where 'yes' is 1 and 'no' is 0.
    data['fire'] = data['fire'].str.strip().map({'yes':1 ,'no':0})
    X = data.iloc[:,1:].values
    y = data.iloc[:,:1].values
    return X, y

  def normalize(self, X_train, X_test) :
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test) 
    return X_train, X_test

  def trainAndPredict(self, X_train, X_test, y_train, y_test) :
    #logistic regression
    classifier = LogisticRegression(random_state = 0)
    classifier.fit(X_train, y_train.ravel())
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    print('Confusion Matrix: ')
    print(cm)
    print('Accuracy Percentage: ',accuracy_score(y_test, y_pred) * 100)
    return y_pred

  def savePredictions(self, file, y_test, y_pred) :
    with open(file, 'w') as csvfile:
        fieldnames = ['Actual Label', 'Predicted Label']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for index in range(len(y_pred)):
            writer.writerow({'Actual Label': y_test[index][0], 'Predicted Label': y_pred[index]}) 

def main() :
  model = Logistic_sklearn()
  X, y = model.readData('data.csv')
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, random_state = 0)
  X_train, X_test = model.normalize(X_train, X_test)
  y_pred = model.trainAndPredict(X_train, X_test, y_train, y_test)
  model.savePredictions('Output_Sklearn.csv', y_test, y_pred)

if __name__ == "__main__" :     
    main()

Confusion Matrix: 
[[27  3]
 [ 3 35]]
Accuracy Percentage:  91.17647058823529


In [2]:
class Logistic() :
  def readData(self, file) :
    # Data pre-processing.
    data = pd.read_csv(file)
    # Converting dependent variable 'fire' in binary form where 'yes' is 1 and 'no' is 0.
    data['fire'] = data['fire'].str.strip().map({'yes':1 ,'no':0})
    X = data.iloc[:,1:].values
    y = data.iloc[:,:1].values
    return X, y

  def normalize(self, X) :
	  mins = np.min(X, axis = 0)
	  maxs = np.max(X, axis = 0)
	  rng = maxs - mins
	  norm_X = 1 - ((maxs - X)/rng)
	  return norm_X

  def splitData(self, X, y) :
    test_size = int(1/3*len(X))
    X_train = X[:-test_size]
    X_test = X[-test_size:]
    y_train = y[:-test_size]
    y_test = y[-test_size:]
    return X_train, X_test, y_train, y_test

  def sigmoid(self, b, X):
    return 1.0/(1 + np.exp(-np.dot(X, b.T)))
   
  def logistic_gradient(self, b, X, y):
	  '''
	  logistic gradient function
	  '''
	  first_gradient = self.sigmoid(b, X) - y.reshape(X.shape[0], -1)
	  final_gradient = np.dot(first_gradient.T, X)
	  return final_gradient
  
  def cost(self, b, X, y):
	  '''
	  cost function
	  '''
	  y_hat = self.sigmoid(b, X)
	  y = np.squeeze(y)
	  prob_y_1 = y * np.log(y_hat)
	  prob_y_0 = (1 - y) * np.log(1 - y_hat)
	  cost = -prob_y_1 - prob_y_0
	  return np.mean(cost)
   
  def gradient_descent(self, X, y, b, learning_rate, converge) :
    cost = self.cost(b, X, y)
    change_in_cost = 1
    while(change_in_cost > converge) : 
      prev_cost = cost
      b = b - (learning_rate * self.logistic_gradient(b, X, y))
      cost = self.cost(b, X, y)
      change_in_cost = prev_cost - cost 
    return b
  
  def fit(self, X, y) :
    # stacking columns with 1's in feature matrix
    X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X))
    # initializing theta values
    b = np.matrix(np.zeros(X.shape[1]))
    b = self.gradient_descent(X, y, b, 0.01, 0.001)
    return b
  
  def predict(self, b, X):
    # stacking columns with 1's in feature matrix
    X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X))
    y_hat = self.sigmoid(b, X)
    predictions = np.where(y_hat >= 0.5, 1, 0)
    return np.squeeze(predictions)
   
  def accuracy(self, y_test, y_pred):
    # counter   
    correctly_classified = 0 
    for count in range( np.size( y_pred ) ) :    
      if y_test[count] == y_pred[count] :            
        correctly_classified = correctly_classified + 1 
    return (correctly_classified / len(y_test) ) * 100 

  def savePredictions(self, file, y_test, y_pred) :
    with open(file, 'w') as csvfile:
        fieldnames = ['Actual Label', 'Predicted Label']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for index in range(len(y_pred)):
            writer.writerow({'Actual Label': y_test[index][0], 'Predicted Label': y_pred[index]}) 

def main() :
  model = Logistic()
  X, y = model.readData('data.csv')
  X = model.normalize(X)
  X_train, X_test, y_train, y_test = model.splitData(X, y)
  b = model.fit(X_train, y_train)
  y_pred = model.predict(b, X_test)
  print('Accuracy Percentage:', model.accuracy(y_test, y_pred))
  model.savePredictions('Output.csv', y_test, y_pred)

if __name__ == "__main__" :     
    main()

Accuracy Percentage: 85.29411764705883
