## Import to model and read Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from matplotlib import pyplot as plt
from sklearn.preprocessing import RobustScaler
from sklearn import preprocessing
from sklearn.linear_model import Ridge
%matplotlib inline

## Getting rid of missing values without scaling

In [None]:
# Reading in house data
data = pd.read_csv('kc_house_data.csv')

In [None]:
# Droped missing values
clean_data = data.dropna()

In [None]:
# Setting target variable and features
y_drop = clean_data['price']

X_drop = clean_data[['bedrooms', 'floors', 'bathrooms', 'sqft_living', 'sqft_lot', 'sqft_lot15', 'zipcode', 'lat', 'long']]


In [None]:
# Visualizing my data
X_drop.hist(figsize=(12,12));

In [None]:
# Splitting my data for modeling
X_train_drop, X_test_drop, y_train_drop, y_test_drop = train_test_split(X_drop, y_drop, random_state=1)

## Ridge modeling on none scaled data

In [None]:
# Setting my ridge model and training un-scaled data
model_ridge = Ridge(alpha=1, normalize=False)

model_ridge.fit(X_train_drop, y_train_drop)

In [None]:
# Predicting unseen data
predict = model_ridge.predict(X_test_drop)

print("R^2 for raw data:", r2_score(y_test_drop, predict))

## Preproccessing of data (droped missing values) and Scaling

In [None]:
# Droped na values in the data frame
droped_data = data.dropna()
# Droped unwanted columns
droped_data.drop(['date', 'id', 'sqft_above', 'sqft_basement', 'yr_renovated', 'view', 'waterfront'], axis=1, inplace=True)
# Scaled the columns i was possible gonna use
robust = RobustScaler()

robust_scaling = scaler.fit_transform(droped_data)

robust_scaled_df = pd.DataFrame(robust_scaling, columns=droped_data.columns)

In [None]:
# Setting my target variable and features
y = robust_scaled_df['price']

X = robust_scaled_df[['bedrooms', 'floors', 'bathrooms', 'sqft_living', 'sqft_lot', 'sqft_lot15', 'zipcode', 'lat', 'long']]


In [None]:
# Visualizing graph distributions
X.hist(figsize=(12,12));

In [None]:
# Splitting data for linear regression model
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

## Ridge model with scaled data

In [None]:
# Define my ridge model
model = Ridge(alpha=1, normalize=False)

model.fit(X_train, y_train)

In [None]:
# Coefficients for ridge model with scaled data
model.coef_

In [None]:
model.intercept_

In [None]:
# Calculating the r^2 for training data
pred = model.predict(X_train)

print("Training R^2:", r2_score(y_train, pred))

In [None]:
# Calculating predictions for unseen data
test_pred = model.predict(X_test)

print("Test R^2:", r2_score(y_test, test_pred))