## Mean Absolute Error (MAE)

In [None]:
from sklearn.metrics import mean_absolute_error

prediction = model.predict(X)

### On average, our predictions are off by
mean_absolute_error(y, prediction)

## Split Data
##### The split is based on a random number generator, Supplying a numeric value to the random_state argument guarantees we get the same split every time we run this script

In [None]:
from sklearn.model_selection import train_test_split

# Split data into training and validation data for both features and target
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)

## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler, scale
from sklearn.linear_model import SGDRegressor

scaler = StandardScaler()

### Z Scale the training data
X_norm = scaler.fit_transform(X_train) 

### Another way to scale generally with options
scale(X_orig, axis=0, with_mean=True, with_std=True, copy=True)

### Scikit-learn has a gradient descent regression model that performs best with normalized input
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm, y_train)

# Make predictions with normalized data
y_pred_sgd = sgdr.predict(X_norm)

## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

linear_model = LinearRegression()
    
# X must be a 2-D Matrix
linear_model.fit(X_train.reshape(-1, 1), y_train) 

# View parameters
b = linear_model.intercept_
w = linear_model.coef_

# Make predictions
y_pred = linear_model.predict(X_train.reshape(-1, 1))

# Accuracy
linear_model.score(X_train.reshape(-1,1), y)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()
lr_model.fit(X, y)

## Decision Tree

In [None]:
# Code you have previously used to load data
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor


# Path of the file to read
iowa_file_path = '../input/home-data-for-ml-course/train.csv'

home_data = pd.read_csv(iowa_file_path)


# Create target object and call it y
y = home_data.SalePrice

# Create X
features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
X = home_data[features]

# Split into validation and training data
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)

# Specify Model
iowa_model = DecisionTreeRegressor(max_leaf_nodes = 20, random_state=1)

# Fit Model
iowa_model.fit(train_X, train_y)


In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor

### Controlling Tree Depth
# The max_leaf_nodes argument provides a very sensible way to control overfitting vs underfitting 
# The more leaves we allow the model to make, the more we move towards the overfitting
# We can use a utility function to help compare MAE scores from different values for max_leaf_nodes

def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)
    model.fit(train_X, train_y)
    preds_val = model.predict(val_X)
    mae = mean_absolute_error(val_y, preds_val)
    return(mae)

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

# The random forest uses many trees, and it makes a prediction by averaging the predictions of each component tree
# It generally has much better predictive accuracy than a single decision tree and it works well with default parameters 
# But one of the best features of Random Forest models is that they generally work reasonably even without any tuning

forest_model = RandomForestRegressor(random_state=1)

forest_model.fit(train_X, train_y)