### In this workbook I will take a dummy obj function along with some constraints and try to optimize using gradient descent

#### Download data is available on https://archive.ics.uci.edu/ml/datasets/Air+Quality

In [None]:
## First, lets import libs

import numpy as np
import scipy 
import random
import math

# Matplotlib is the python plotting library and folks generally import it as "plt"
import matplotlib.pyplot as plt 

# Seaborn is a wrapper for Matplotlib and makes some things easier, generally imported as "sns"
import seaborn as sns 
import pandas as pd

In [None]:
### lets first generate some random data for processing

data = pd.read_csv('C:/Users/rajneesh.tiwari/Downloads/AirQualityUCI/AirQualityUCI.csv', sep=";")
data.head(5)

### lets just consider s2 and Nox concentrations

In [None]:
## we will only consider 2 cols for demo purpose

data_refined = data.loc[:,['PT08.S1(CO)','NOx(GT)']]

In [None]:
## plot the data
plt.plot(data_refined.iloc[:,0],data_refined.iloc[:,1], "o")
plt.show()

In [None]:
# getting rid of negatives in the data -- some error here
data_refined = data_refined.loc[(data_refined['PT08.S1(CO)']>0) & (data_refined['NOx(GT)']>0)]

In [None]:
## plot the data
plt.plot(data_refined.iloc[:,0],data_refined.iloc[:,1], "o")
plt.show()

In [None]:
### above looks like linear regression

In [None]:
## add intercept term
data_refined['intercept'] = 1

In [None]:
def gradientDescent(x, y, theta, lr, m, numIterations):
    xTrans = x.transpose()
    cost_array = []
    
    for i in range(0, numIterations):
        hypothesis = np.dot(x, theta)
        loss = hypothesis - y
        cost = np.sum(loss ** 2) / (2 * m)
        gradient = np.dot(xTrans, loss) / m
        theta = theta - lr * gradient
        print("Iteration %d | Cost: %f"  % (i, cost))
        cost_array.append(cost)
        
    return theta,cost_array

In [None]:
### data with intercept term
x = np.array(data_refined.loc[:,('intercept','PT08.S1(CO)')])
y = np.array(data_refined.iloc[:,1])

In [None]:
### setup parms and run grad desc

m, n = np.shape(x)
numIterations= 1000
lr = 0.000000001
theta = np.zeros(n)  #### there are better ways of initializing theta, but we will do simple stuf for now
theta,cost_array = gradientDescent(x, y, theta, lr, m, numIterations)

In [None]:
### plot iterations vs cost; this should go down rapidly

plt.plot(np.arange(numIterations),cost_array)
plt.ylabel('cost')
plt.xlabel('iterations')
plt.show()