In [1]:
import csv
import numpy as np

In [2]:
import matplotlib.pyplot as plt

In [3]:
def loadCSV(filename):
    with open(filename, 'r') as csvfile:
        lines = csv.reader(csvfile)
        dataset=list(lines)
        for i in range(len(dataset)):
            if(i==0):
                continue
            dataset[i]=[float(x) for x in dataset[i]]
    return np.array(dataset)

In [4]:
dataset = loadCSV('Admission_Predict_Ver1.1.csv')

In [5]:
title = dataset[0]

In [6]:
title

array(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating',
       'SOP', 'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='<U17')

In [7]:
dataset=dataset[1:].astype(np.float)

In [8]:
dataset

array([[  1.  , 337.  , 118.  , ...,   9.65,   1.  ,   0.92],
       [  2.  , 324.  , 107.  , ...,   8.87,   1.  ,   0.76],
       [  3.  , 316.  , 104.  , ...,   8.  ,   1.  ,   0.72],
       ...,
       [498.  , 330.  , 120.  , ...,   9.56,   1.  ,   0.93],
       [499.  , 312.  , 103.  , ...,   8.43,   0.  ,   0.73],
       [500.  , 327.  , 113.  , ...,   9.04,   0.  ,   0.84]])

In [9]:
y = dataset[:,-1]

In [10]:
X = dataset[:,1:-1]

In [11]:
X

array([[337.  , 118.  ,   4.  , ...,   4.5 ,   9.65,   1.  ],
       [324.  , 107.  ,   4.  , ...,   4.5 ,   8.87,   1.  ],
       [316.  , 104.  ,   3.  , ...,   3.5 ,   8.  ,   1.  ],
       ...,
       [330.  , 120.  ,   5.  , ...,   5.  ,   9.56,   1.  ],
       [312.  , 103.  ,   4.  , ...,   5.  ,   8.43,   0.  ],
       [327.  , 113.  ,   4.  , ...,   4.5 ,   9.04,   0.  ]])

In [12]:
def normalize(X): 
    ''' 
    function to normalize feature matrix, X 
    '''
    mins = np.min(X, axis = 0) 
    print(mins)
    maxs = np.max(X, axis = 0) 
    print(maxs)
    rng = maxs - mins 
    print(rng)
    norm_X = 1 - ((maxs - X)/rng) 
    print('normalized X')
    print(norm_X)
    return norm_X 

In [13]:
normalized_X=normalize(X)

[290.   92.    1.    1.    1.    6.8   0. ]
[340.   120.     5.     5.     5.     9.92   1.  ]
[50.   28.    4.    4.    4.    3.12  1.  ]
normalized X
[[0.94       0.92857143 0.75       ... 0.875      0.91346154 1.        ]
 [0.68       0.53571429 0.75       ... 0.875      0.66346154 1.        ]
 [0.52       0.42857143 0.5        ... 0.625      0.38461538 1.        ]
 ...
 [0.8        1.         1.         ... 1.         0.88461538 1.        ]
 [0.44       0.39285714 0.75       ... 1.         0.5224359  0.        ]
 [0.74       0.75       0.75       ... 0.875      0.71794872 0.        ]]


In [59]:
def nomalize_to_binary(y):
    median=np.mean(y)
    nomalized_y=[]
    for i in y:
        if i<median:
            nomalized_y.append(0)
        else:
            nomalized_y.append(1)
    return np.array(nomalized_y)

In [60]:
normalized_y = nomalize_to_binary(y)

In [120]:
def grad_desc(X, y, beta, lr=.001, converge_change=.0001): 
    ''' 
    gradient descent function 
    '''
    cost = cost_func(beta, X, y) 
    change_cost = 1
    num_iter = 1
    while(change_cost > converge_change): 
        old_cost = cost 
        beta = beta - (lr * log_gradient(beta, X, y)) 
        cost = cost_func(beta, X, y) 
        change_cost = old_cost - cost 
        num_iter += 1
      
    return beta, num_iter

In [121]:
def log_gradient(beta, X, y): 
    ''' 
    logistic gradient function 
    '''
    first_calc = logistic_func(beta, X) - y.reshape(X.shape[0], -1) 
    final_calc = np.dot(first_calc.T, X) 
    return final_calc 
  


In [122]:
def cost_func(beta, X, y): 
    ''' 
    cost function, J 
    '''
    log_func_v = logistic_func(beta, X) 
    y = np.squeeze(y) 
    step1 = y * np.log(log_func_v) 
    step2 = (1 - y) * np.log(1 - log_func_v) 
    final = -step1 - step2 
    return np.mean(final) 

In [132]:
def logistic_func(beta, X): 
    ''' 
    logistic(sigmoid) function 
    '''
    return 1.0/(1 + np.exp(-np.dot(X, beta.T))) 

In [133]:
beta = np.matrix(np.zeros(normalized_X.shape[1])) 

In [134]:
beta

matrix([[0., 0., 0., 0., 0., 0., 0.]])

In [135]:
beta, num_iter = grad_desc(normalized_X, normalized_y, beta) 

In [136]:
beta

matrix([[ 2.24230058, -0.69599048,  2.4076364 , -0.01573474, -2.65866357,
         -1.22760353,  1.25074255]])

In [137]:
num_iter

2543

In [138]:
def pred_values(beta, X): 
    ''' 
    function to predict labels 
    '''
    pred_prob = logistic_func(beta, X) 
    pred_value = np.where(pred_prob >= .5, 1, 0) 
    return np.squeeze(pred_value) 

In [139]:
y_pred = pred_values(beta, normalized_X) 

In [140]:
print("Correctly predicted labels:", np.sum(normalized_y == y_pred)) 

Correctly predicted labels: 354
