# Half Sapce Classifier using Perceptron

## Step 1: Import All required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Step 2: Read CSV file (For this i have uploded the file on google colab) 

In [36]:
df = pd.read_csv('file.csv')
print(df.shape,df)

(2665, 6)       Temperature   Humidity       Light          CO2  HumidityRatio  Occupancy
0       23.700000  26.272000  585.200000   749.200000       0.004764          1
1       23.718000  26.290000  578.400000   760.400000       0.004773          1
2       23.730000  26.230000  572.666667   769.666667       0.004765          1
3       23.722500  26.125000  493.750000   774.750000       0.004744          1
4       23.754000  26.200000  488.600000   779.000000       0.004767          1
...           ...        ...         ...          ...            ...        ...
2660    24.290000  25.700000  808.000000  1150.250000       0.004829          1
2661    24.330000  25.736000  809.800000  1129.200000       0.004848          1
2662    24.330000  25.700000  817.000000  1125.800000       0.004841          1
2663    24.356667  25.700000  813.000000  1123.000000       0.004849          1
2664    24.408333  25.681667  798.000000  1124.000000       0.004860          1

[2665 rows x 6 columns]


## Step 3: Store the features and target in X , Y seperately and since target has value 0,1 change label 0 to -1

In [45]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
print(X.shape,X)
Y[Y==0]=-1
print(Y.shape,Y)

(2665, 5) [[2.37000000e+01 2.62720000e+01 5.85200000e+02 7.49200000e+02
  4.76416302e-03]
 [2.37180000e+01 2.62900000e+01 5.78400000e+02 7.60400000e+02
  4.77266099e-03]
 [2.37300000e+01 2.62300000e+01 5.72666667e+02 7.69666667e+02
  4.76515255e-03]
 ...
 [2.43300000e+01 2.57000000e+01 8.17000000e+02 1.12580000e+03
  4.84075873e-03]
 [2.43566667e+01 2.57000000e+01 8.13000000e+02 1.12300000e+03
  4.84855928e-03]
 [2.44083333e+01 2.56816667e+01 7.98000000e+02 1.12400000e+03
  4.86020770e-03]]
(2665, 1) [[1]
 [1]
 [1]
 ...
 [1]
 [1]
 [1]]


## Step 4: Processing the Data and Splitting


In [73]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_scale

array([[0.83168317, 0.44513204, 0.34479305, 0.33003334, 0.70421197],
       [0.8359604 , 0.44705255, 0.34078657, 0.34152347, 0.70830847],
       [0.83881188, 0.44065084, 0.33740855, 0.35103018, 0.70468898],
       ...,
       [0.98138614, 0.38410243, 0.48136692, 0.71638882, 0.74113545],
       [0.98772277, 0.38410243, 0.47901016, 0.71351629, 0.74489576],
       [1.        , 0.38214635, 0.47017234, 0.71454219, 0.75051096]])

A. Split the data in 70,30 for training and testing the model 

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1865, 5) (800, 5) (1865, 1) (800, 1)


B. Split the data in 80,20 for training and testing the model 

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2132, 5) (533, 5) (2132, 1) (533, 1)


C. Split the data in 90,10 for training and testing the model 

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2398, 5) (267, 5) (2398, 1) (267, 1)


## Step 5: Defining W,W1 for calculating the value of w

In [78]:
w1 = np.full((1,X_train.shape[1]),0)
w = []
w.append(w1)
len(w)

1

## Step 6: Calculating optimized value of w using perceptron rule that is w[i+1] = w[i] + yi*xi

In [81]:
t=0
s=0
for k in range(len(w)) :
    for i in range(X_train.shape[0]):
      for j in range(X_train.shape[1]):
        t += (w[k][0,j]*X_train[i,j])
        s += (y_train[i]*X_train[i,j])
    if (y_train[i]*t<=0):
      w2 = w[k] + s
      w.append(w2)
    else :
      break;
print(w[len(w)-1])

[[443443.65768144 443443.65768144 443443.65768144 443443.65768144
  443443.65768144]]


## Step 7: Creating target Array.

In [82]:
target = np.full((y_test.shape[0],1),0)
print(target.shape,X_test.shape)

(267, 1) (267, 5)


## Step 8: Calculation target as inner product of wi*xi. 

In [83]:
for i in range(X_test.shape[0]):
  for j in range(X_test.shape[1]):
    t += (w[len(w)-1][0,j]*X_test[i,j])
    if t>=0 :
      target[i,0] = 1
    else :
      target[i,0] = -1

## Step 9: Checking for Accuracy. 

In [84]:
correctly_classified = 0
for c in range( np.size( y_test ) ) :  
  if y_test[c] == target[c] :            
    correctly_classified = correctly_classified + 1
  c = c + 1 
print("Accuracy = ",(correctly_classified/c) * 100)

Accuracy =  35.95505617977528
