## Project 3


#### Linear Regression Algorithm

In [1]:
import numpy as np
import pandas as pd

def linear_regression(x, y):
    X = np.column_stack((np.ones(len(x)), x))

    # Calculate A
    A = np.dot(np.transpose(X), X)

    # Calculate b
    b = np.dot(np.transpose(X), y)

    # Calculate w
    w = np.dot(np.linalg.inv(A), b)
    
    return w

#### Using Linear Regression Algorithm on Training data

In [2]:
training_data = pd.read_csv('Project3_Training.csv')
x = training_data.iloc[:,:-1]
y = training_data.iloc[:,-1]
w = linear_regression(x, y)
np.set_printoptions(precision=4, suppress=True)

print("w:", w)

w: [5.8087 0.3945 0.1141 0.4222]


#### Use weights to estimate course cores in predict data

In [3]:
predict_data = pd.read_csv('Project3_Predict.csv')
predictions = np.column_stack((predict_data, np.zeros(len(predict_data))))

for data in predictions:
    grade = w[0]
    grade += data[0] * w[1]
    grade += data[1] * w[2]
    grade += data[2] * w[3]
    data[3] = grade

print(predictions)

[[ 49.      79.      71.      64.1236]
 [ 56.      21.      46.      49.7134]
 [ 58.     100.      79.      73.4469]
 [ 61.      82.      58.      63.7112]
 [ 62.      90.      62.      66.707 ]
 [ 66.      99.      75.      74.7998]
 [ 71.      73.      65.      69.584 ]
 [ 73.      87.      80.      78.3026]
 [ 73.      97.      73.      76.4884]
 [ 74.      73.      77.      75.8333]
 [ 74.      85.      66.      72.5586]
 [ 74.     100.      87.      83.1354]
 [ 78.      79.      56.      69.2303]
 [ 80.      72.      71.      75.553 ]
 [ 80.      82.      90.      84.7149]
 [ 81.      56.      60.      69.4782]
 [ 81.      80.      78.      79.8153]
 [ 82.     100.      79.      82.9137]
 [ 83.      80.      56.      71.3166]
 [ 83.      96.      64.      76.5194]
 [ 84.      98.      85.      86.0074]
 [ 84.     100.      77.      82.8583]
 [ 85.      93.      75.      81.6098]
 [ 85.      99.      69.      79.7614]
 [ 86.      95.      77.      83.0767]
 [ 86.     100.      85. 

#### Modify output of Training set to denote Pass or Fail and normalize data

In [4]:
x = training_data[['Midterm', 'Homework', 'Quiz']].values

# Modify course grade to Pass or Fail
y = (training_data['Course Grade'] >= 70).astype(int).values
y[y == 0] = -1

# Normalize data
training_mean = np.mean(x, axis=0)
training_std = np.std(x, axis=0)
x_normalized = (x - training_mean) / training_std

#### Logistic Regression Algorithm (with Gradient Descent)

In [5]:
def logistic_regression(x, y, w):
    n = len(y)
    sum = 0
    for i in range(n):
        yx = y[i] * x[i];
        ywx = y[i] * np.transpose(w) * x[i]
        sum += yx / (1 + np.exp(ywx))
    gradient = (-1 / n) * sum
    return gradient

def gradient_descent(x, y, w, n=0.01, max_iter=20000):
    for i in range(max_iter):
        # Run w through function
        grad = logistic_regression(x, y, w)
        
        w = w - n * grad
        
    return w

#### Run Algorithm with initial weights of 0

In [6]:
w = np.zeros(x_normalized.shape[1])

final_w = gradient_descent(x_normalized, y, w)

print(final_w)

[0.8762 0.7446 1.3895]


#### Use final weights to predict probablity of passing for the predict data

In [7]:
from scipy import stats

x_predict = predict_data.values
x_predict_normalized = (x_predict - training_mean) / training_std
n = len(predict_data)

# calculate predictions
prob_passing = np.zeros(n)
for i in range(n):
    p = x_predict_normalized[i]
    prob_passing[i] = final_w[0] * p[0] + final_w[1] * p[1] + final_w[2] * p[2]

# convert z-score to probablility
predictions = stats.norm.cdf(prob_passing)

result = np.column_stack((predict_data, predictions))
print(result)

[[ 49.      79.      71.       0.0011]
 [ 56.      21.      46.       0.    ]
 [ 58.     100.      79.       0.173 ]
 [ 61.      82.      58.       0.0006]
 [ 62.      90.      62.       0.0057]
 [ 66.      99.      75.       0.2248]
 [ 71.      73.      65.       0.0131]
 [ 73.      87.      80.       0.4062]
 [ 73.      97.      73.       0.3085]
 [ 74.      73.      77.       0.1744]
 [ 74.      85.      66.       0.0689]
 [ 74.     100.      87.       0.823 ]
 [ 78.      79.      56.       0.0107]
 [ 80.      72.      71.       0.1415]
 [ 80.      82.      90.       0.8362]
 [ 81.      56.      60.       0.0053]
 [ 81.      80.      78.       0.4644]
 [ 82.     100.      79.       0.7862]
 [ 83.      80.      56.       0.0285]
 [ 83.      96.      64.       0.2695]
 [ 84.      98.      85.       0.9192]
 [ 84.     100.      77.       0.7763]
 [ 85.      93.      75.       0.656 ]
 [ 85.      99.      69.       0.5334]
 [ 86.      95.      77.       0.7655]
 [ 86.     100.      85. 