In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fmin_ncg

In [2]:
pd.options.display.float_format = '{:20,.3f}'.format
data = pd.read_csv('data/Car details.csv')
sample_input = np.c_[data['name'], data['year'], data['selling_price'], data['km_driven'], 
                     data['fuel'], data['seller_type'], data['transmission'], data['owner'], data['mileage'],
                     data['engine'], data['max_power'], data['torque'], data['seats']]
sample_output = np.c_[data['selling_price']]
data.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price,Address
0,79545.459,5.683,7.009,4.09,23086.801,1059033.558,"208 Michael Ferry Apt. 674\nLaurabury, NE 3701..."
1,79248.642,6.003,6.731,3.09,40173.072,1505890.915,"188 Johnson Views Suite 079\nLake Kathleen, CA..."
2,61287.067,5.866,8.513,5.13,36882.159,1058987.988,"9127 Elizabeth Stravenue\nDanieltown, WI 06482..."
3,63345.24,7.188,5.587,3.26,34310.243,1260616.807,USS Barnett\nFPO AP 44820
4,59982.197,5.041,7.839,4.23,26354.109,630943.489,USNS Raymond\nFPO AE 09386


In [3]:
X = np.c_[np.ones(sample_input.shape[0]), sample_input]
y = np.c_[sample_output]

In [4]:
def computeCost(theta, X, y):
    m = y.size
    h = X.dot(theta)

    J = 1/(2*m)*np.sum(np.square(h-y))  
    
    return(J)

In [5]:
initial_theta = np.zeros(X.shape[1])
computeCost(initial_theta.reshape(-1,1), X, y)


821335072437.9965

In [6]:
def gradient(theta, X, y, alpha=0.01):
    m = y.size
    h = X.dot(theta)
    
    grad = alpha*(1/m)*(X.T.dot(h-y))

    return(grad.flatten())

In [7]:
optimized = fmin_ncg(f=computeCost, x0=initial_theta, fprime=gradient, args=(X, y.flatten()))
optimized

Optimization terminated successfully.
         Current function value: 5109867366.187583
         Iterations: 7
         Function evaluations: 10
         Gradient evaluations: 47
         Hessian evaluations: 0


array([-2.63733012e+06,  2.15770252e+01,  1.65640269e+05,  1.20668176e+05,
        1.66025762e+03,  1.52004624e+01])

In [8]:
computeCost(optimized.reshape(-1,1), X, y)

5109867366.187583

In [9]:
print(X[0].dot(optimized.reshape(-1,1)))

[1223841.36533062]
