In [2]:
library(dplyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(reshape2)

In [3]:
df.train = tbl_df(read.csv("../data/train.csv", stringsAsFactors = FALSE))
df.test = tbl_df(read.csv("../data/test.csv", stringsAsFactors = FALSE))
df.combined = rbind(within(df.train, rm('Id','SalePrice')), within(df.test, rm('Id')))
df.combined2 = rbind(df.combined)

In [8]:
df.lot.frontage = df.combined %>% filter(!is.na(LotFrontage)) %>%
select(LotFrontage, MSSubClass, MSZoning, LotArea, LotShape, LandContour, LotConfig, 
       Neighborhood, BldgType, HouseStyle, YearBuilt, X1stFlrSF) %>% na.omit() %>%
mutate(LotAreaSqrt=sqrt(LotArea), X1stFlrSFSqrt=sqrt(X1stFlrSF))
head(df.lot.frontage)

Unnamed: 0,LotFrontage,MSSubClass,MSZoning,LotArea,LotShape,LandContour,LotConfig,Neighborhood,BldgType,HouseStyle,YearBuilt,X1stFlrSF,LotAreaSqrt,X1stFlrSFSqrt
1,65,60,RL,8450,Reg,Lvl,Inside,CollgCr,1Fam,2Story,2003,856,91.92388,29.25748
2,80,20,RL,9600,Reg,Lvl,FR2,Veenker,1Fam,1Story,1976,1262,97.97959,35.52464
3,68,60,RL,11250,IR1,Lvl,Inside,CollgCr,1Fam,2Story,2001,920,106.066,30.3315
4,60,70,RL,9550,IR1,Lvl,Corner,Crawfor,1Fam,2Story,1915,961,97.7241,31.0
5,84,60,RL,14260,IR1,Lvl,FR2,NoRidge,1Fam,2Story,2000,1145,119.4152,33.83785
6,85,50,RL,14115,IR1,Lvl,Inside,Mitchel,1Fam,1.5Fin,1993,796,118.8066,28.21347


* LotFrontage - Linear feet of street connected to property
* LotArea - Lot size in square feet
* X1stFlrSF - First Floor square feet
* MSSubClass - Identifies the type of dwelling involved in the sale
* MSZoning - Identifies the general zoning classification of the sale
* LotShape - General shape of property 
* LandContour - Flatness of the property
* LotConfig - Lot configuration 
* Neighborhood - Physical locations within Ames city limits
* BldgType - Type of dwelling 
* HouseStyle - Style of dwelling 
* YearBuilt - Original construction date

In [9]:
model.lm <- lm(LotFrontage ~ LotArea, data = df.lot.frontage)
summary(model.lm)$r.squared

In [10]:
model.lm <- lm(LotFrontage ~ LotAreaSqrt, data = df.lot.frontage)
summary(model.lm)$r.squared

In [11]:
model.lm <- lm(LotFrontage ~ X1stFlrSF, data = df.lot.frontage)
summary(model.lm)$r.squared

In [12]:
model.lm <- lm(LotFrontage ~ X1stFlrSFSqrt, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ MSSubClass, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ MSZoning, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ LotShape, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ LandContour, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ X1stFlrSFSqrt, data = df.lot.frontage)
summary(model.lm)$r.squared

In [None]:
model.lm <- lm(LotFrontage ~ X1stFlrSFSqrt, data = df.lot.frontage)
summary(model.lm)$r.squared