# Half Sapce Classifier using Linear Programming

## Step 1: Import All required Libraries

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.optimize import linprog

## Step 2: Read CSV file (For this i have uploded the file on google colab) 

In [46]:
#df = pd.read_csv('file.csv')
df = pd.read_csv('file.csv')
print(df.shape)
print(df)

(2665, 6)
      Temperature   Humidity       Light          CO2  HumidityRatio  Occupancy
0       23.700000  26.272000  585.200000   749.200000       0.004764          1
1       23.718000  26.290000  578.400000   760.400000       0.004773          1
2       23.730000  26.230000  572.666667   769.666667       0.004765          1
3       23.722500  26.125000  493.750000   774.750000       0.004744          1
4       23.754000  26.200000  488.600000   779.000000       0.004767          1
...           ...        ...         ...          ...            ...        ...
2660    24.290000  25.700000  808.000000  1150.250000       0.004829          1
2661    24.330000  25.736000  809.800000  1129.200000       0.004848          1
2662    24.330000  25.700000  817.000000  1125.800000       0.004841          1
2663    24.356667  25.700000  813.000000  1123.000000       0.004849          1
2664    24.408333  25.681667  798.000000  1124.000000       0.004860          1

[2665 rows x 6 columns]


## Step 3: Store the features and target in X , Y seprately and since target has value 0,1 changing the value 0 to -1

In [52]:
# X = df.iloc[:,:-1].values
# Y = df.iloc[:,-1:].values
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
Y[Y==0]=-1
print(X.shape,X)
print(Y.shape,Y)

(2665, 5) [[2.37000000e+01 2.62720000e+01 5.85200000e+02 7.49200000e+02
  4.76416302e-03]
 [2.37180000e+01 2.62900000e+01 5.78400000e+02 7.60400000e+02
  4.77266099e-03]
 [2.37300000e+01 2.62300000e+01 5.72666667e+02 7.69666667e+02
  4.76515255e-03]
 ...
 [2.43300000e+01 2.57000000e+01 8.17000000e+02 1.12580000e+03
  4.84075873e-03]
 [2.43566667e+01 2.57000000e+01 8.13000000e+02 1.12300000e+03
  4.84855928e-03]
 [2.44083333e+01 2.56816667e+01 7.98000000e+02 1.12400000e+03
  4.86020770e-03]]
(2665, 1) [[1]
 [1]
 [1]
 ...
 [1]
 [1]
 [1]]


## Step 4: Data Preprocessing and Splitting of data
 

In [62]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_scale

array([[0.83168317, 0.44513204, 0.34479305, 0.33003334, 0.70421197],
       [0.8359604 , 0.44705255, 0.34078657, 0.34152347, 0.70830847],
       [0.83881188, 0.44065084, 0.33740855, 0.35103018, 0.70468898],
       ...,
       [0.98138614, 0.38410243, 0.48136692, 0.71638882, 0.74113545],
       [0.98772277, 0.38410243, 0.47901016, 0.71351629, 0.74489576],
       [1.        , 0.38214635, 0.47017234, 0.71454219, 0.75051096]])

A. Split the data in 70,30 for training and testing the model

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(X_train, X_test, y_train, y_test)

(1865, 5) (800, 5) (1865, 1) (800, 1)
[[0.42772277 0.42322397 0.34271125 0.52235616 0.46154661]
 [0.22574257 0.50040011 0.         0.26160554 0.41120971]
 [0.05168317 0.02560683 0.         0.00615542 0.00255733]
 ...
 [0.14019802 0.28700987 0.25511857 0.25151748 0.21989708]
 [0.48237624 0.28736552 0.         0.34444729 0.38830971]
 [0.07128713 0.22192585 0.24286346 0.11644011 0.14354726]] [[0.65787836 0.87535724 0.27127917 0.88953211 0.944271  ]
 [0.33267327 0.22646039 0.         0.2321108  0.2689721 ]
 [0.0950495  0.01307015 0.         0.02205694 0.01218616]
 ...
 [0.1639604  0.10669512 0.         0.06976148 0.10613338]
 [0.1639604  0.09602561 0.         0.05898948 0.09873902]
 [0.8370297  0.42944785 0.29091177 0.35624519 0.69438299]] [[ 1]
 [-1]
 [-1]
 ...
 [ 1]
 [-1]
 [ 1]] [[ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [

B. Split the data in 80,20 for training and testing the model 

In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(2132, 5) (533, 5) (2132, 1) (533, 1)


C. Split the data in 90,10 for training and testing the model 

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(2398, 5) (267, 5) (2398, 1) (267, 1)


## Step 5: Creating the constraints for LP solver that is w (optimization variable), c=-1 and X_train = inner product of Xi*Yi.   

In [67]:
c = np.full((y_train.shape[0],1),1)
print(c.shape,c)
l = X_train.shape[1]
w = np.full((1,l),1)
print(w.shape,w)
for i in range(X_train.shape[0]):
  for j in range(X_train.shape[1]):
    X_train[i,j]=X_train[i,j]*y_train[i]       
print(X_train)

(2398, 1) [[1]
 [1]
 [1]
 ...
 [1]
 [1]
 [1]]
(1, 5) [[1 1 1 1 1]]
[[-2.06000000e+01 -2.43900000e+01 -0.00000000e+00 -4.68250000e+02
  -3.65516051e-03]
 [ 2.28900000e+01  2.76000000e+01  4.32750000e+02  1.04360000e+03
   4.76604528e-03]
 [-2.03900000e+01 -2.25000000e+01 -0.00000000e+00 -4.35000000e+02
  -3.32675847e-03]
 ...
 [ 2.07900000e+01  2.47900000e+01  4.33000000e+02  6.72666667e+02
   3.75947872e-03]
 [-2.22300000e+01 -2.47933333e+01 -0.00000000e+00 -7.63250000e+02
  -4.10884135e-03]
 [ 2.05000000e+01  2.41800000e+01  4.12200000e+02  5.41000000e+02
   3.60109528e-03]]


## Step 6: Using Linprog for optimization of w. 

In [68]:
res = linprog(c=w, A_ub=X_train, b_ub=c)
print(res)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


     con: array([], dtype=float64)
     fun: 6.250000015343017e-13
 message: 'Optimization terminated successfully.'
     nit: 3
   slack: array([1., 1., 1., ..., 1., 1., 1.])
  status: 0
 success: True
       x: array([1.25e-13, 1.25e-13, 1.25e-13, 1.25e-13, 1.25e-13])


## Step 7: Checking optimized value of w. 

In [69]:
print(res.x)

[1.25e-13 1.25e-13 1.25e-13 1.25e-13 1.25e-13]


## Step 8: Calculation target as inner product of wi*xi. 

In [70]:
target = np.full(y_test.shape,0)
res.x.transpose()
for i in range(X_test.shape[0]):
  for j in range(X_test.shape[1]):
    target[i] = target[i] + (res.x[j]*X_test[i,j])
  if target[i]>=0 :
    target[i] = 1
  else :
    target[i] = -1

## Step 9: Checking for Accuracy. 

In [71]:
correctly_classified = 0
for c in range( np.size( y_test ) ) :  
  if y_test[c] == target[c] :            
    correctly_classified = correctly_classified + 1
  c = c + 1 
print("Accuracy = ",(correctly_classified/c) * 100)

Accuracy =  35.95505617977528
