# Import Libraries

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
%matplotlib inline

# Load data

In [2]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int,
              'sqft_living15':float, 'grade':int, 'yr_renovated':int, 
              'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 
              'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 
              'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 
              'sqft_lot':int, 'view':int}

In [3]:
sales = pd.read_csv('kc_house_data.csv', dtype=dtype_dict)
train_data = pd.read_csv('kc_house_train_data.csv', dtype=dtype_dict)
test_data = pd.read_csv('kc_house_test_data.csv', dtype=dtype_dict)

# Train model

## Get slope and intercept

In [4]:
def simple_linear_regression(input_feature, output):
    from sklearn.linear_model import LinearRegression
    model = LinearRegression().fit(input_feature, output)
    intercept = model.intercept_
    slope = model.coef_[0]
    
    return (intercept, slope)

In [5]:
input_feature = train_data[['sqft_living']]
output = train_data[['price']]

In [6]:
intercept, slope = simple_linear_regression(input_feature, output)
print(intercept)
print(slope)

[-47116.07907289]
[281.95883963]


In [7]:
simple_linear_regression(input_feature, output)[0]

array([-47116.07907289])

## Predict Output

In [8]:
def get_regression_predictions(input_feature, intercept, slope):
    predicted_output = intercept + slope * input_feature.values
    return predicted_output

In [9]:
input1 = pd.Series([2650])
print(get_regression_predictions(input1, intercept, slope))

[700074.84594751]


## Calculate Residual Sum of Square

In [10]:
def get_residual_sum_of_squares(input_feature, output, intercept, slope):
    predicted_output = get_regression_predictions(input_feature, intercept, slope)
    RSS = pow(output.values - predicted_output, 2).sum()
    
    return RSS

In [11]:
get_residual_sum_of_squares(input_feature, output, intercept, slope)

1201918354177283.2

## Estimate the input given the output

In [12]:
def inverse_regression_predictions(output, intercept, slope):
    return (output - intercept) / slope

In [13]:
output1 = pd.Series([800000])
inverse_regression_predictions(output1, intercept, slope)

0    3004.396245
dtype: float64

## Using "bedrooms" as input feature

In [14]:
input_feature1 = train_data[['bedrooms']]

In [15]:
bedrooms_intercept, bedrooms_slope = simple_linear_regression(input_feature1, output)

In [16]:
get_residual_sum_of_squares(test_data[['sqft_living']], test_data[['price']], intercept, slope)

275402933617812.12

In [17]:
get_residual_sum_of_squares(test_data[['bedrooms']], test_data[['price']], bedrooms_intercept, bedrooms_slope)

493364585960300.9