# California Housing Prices 
## Final Model
### by Anthony Medina

## Table of Contents
### 1. Imports and Data Prep
### 2. Modeling
### 3. Median Examples

## 1. Imports and Data Prep

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline 

In [2]:
# Import the data from the cleaned data folder
house_data = pd.read_csv('../cleaned_data/ready_for_EDA.csv', index_col = 0)

In [3]:
# Using get_dummies on Ocean Proximity and Dropping the Logitude and Latitude columns.
df = house_data
df = pd.get_dummies(df, columns=['ocean_proximity'], drop_first=True)
df = df.drop('longitude', axis =1)
df = df.drop('latitude', axis =1)
df.head()

Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity_INLAND,ocean_proximity_ISLAND,ocean_proximity_NEAR BAY,ocean_proximity_NEAR OCEAN
0,41.0,880,129,322,126,83252.0,452600.0,0,0,1,0
1,21.0,7099,1106,2401,1138,83014.0,358500.0,0,0,1,0
2,52.0,1467,190,496,177,72574.0,352100.0,0,0,1,0
3,52.0,1274,235,558,219,56431.0,341300.0,0,0,1,0
4,52.0,1627,280,565,259,38462.0,342200.0,0,0,1,0


In [4]:
# Normalizing Data
from sklearn.preprocessing import MinMaxScaler
standard_col = ['total_rooms', 'total_bedrooms', 'population', 'households', 'housing_median_age','median_income']
# MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df[standard_col])
df[standard_col] = scaler.transform(df[standard_col])

## 2. Modeling

In [5]:
from sklearn.linear_model import LinearRegression

# Create a Linear Regression model
model = LinearRegression(fit_intercept=True, n_jobs=1)

# Prepare your data (X - features, y - target variable)
X = df.drop('median_house_value', axis =1).values
y = df['median_house_value'].values

# Train the model
model.fit(X, y)

## 3. Median Examples

# Creating a new array of values to see the median price at all different locations.

In [6]:
new_X = [
[df.housing_median_age.median(), df.total_rooms.median(), df.total_bedrooms.median(), df.population.median(), df.households.median(), df.median_income.median(), df.median_house_value.median(),1,0,0,0],
[df.housing_median_age.median(), df.total_rooms.median(), df.total_bedrooms.median(), df.population.median(), df.households.median(), df.median_income.median(), df.median_house_value.median(),0,1,0,0],
[df.housing_median_age.median(), df.total_rooms.median(), df.total_bedrooms.median(), df.population.median(), df.households.median(), df.median_income.median(), df.median_house_value.median(),0,0,1,0],
[df.housing_median_age.median(), df.total_rooms.median(), df.total_bedrooms.median(), df.population.median(), df.households.median(), df.median_income.median(), df.median_house_value.median(),0,0,0,1],
[df.housing_median_age.median(), df.total_rooms.median(), df.total_bedrooms.median(), df.population.median(), df.households.median(), df.median_income.median(), df.median_house_value.median(),0,0,0,0]
]

locations= ["Inland", "Island" , "Near_Bay" , "Near_Ocean" , "Ocean_Front"]

In [7]:
predictions = model.predict(X)

In [8]:
# Printing out the location and the estimated value of living there.
for i in range(5):
    print(locations[i], predictions[i])

Inland 417603.00627294707
Island 426431.0285314664
Near_Bay 385457.95778393495
Near_Ocean 326171.31935394875
Ocean_Front 257276.62687811998
