In [1]:
import pandas as pd
import matplotlib
import numpy as np
import plotly.plotly as py
import plotly.offline as ply
import plotly
import plotly.graph_objs as go
import colorlover as cl
from IPython.display import HTML
from plotly.graph_objs import ColorBar
%matplotlib inline

In [2]:
plotly.offline.init_notebook_mode(connected=True)

In [3]:
train = pd.read_csv('train.csv')

In [4]:
train = train[train['GrLivArea'] < 4000]

In [5]:
cols = ['LotFrontage', 'LotArea', 'OverallQual', 'YearBuilt', 'GarageArea', 'SalePrice']

In [6]:
train[cols].head()

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,GarageArea,SalePrice
0,65.0,8450,7,2003,548,208500
1,80.0,9600,6,1976,460,181500
2,68.0,11250,7,2001,608,223500
3,60.0,9550,7,1915,642,140000
4,84.0,14260,8,2000,836,250000


In [7]:
for col in cols:
    print('{}: {}%'.format(col, train[col].isnull().sum()*100/train[col].shape[0]))

LotFrontage: 17.78846153846154%
LotArea: 0.0%
OverallQual: 0.0%
YearBuilt: 0.0%
GarageArea: 0.0%
SalePrice: 0.0%


In [8]:
train['LotFrontage'] = train['LotFrontage'].fillna(0)

In [9]:
trace = go.Scatter(x=train['LotArea'], y=train['GarageArea'], mode='markers',
                   marker=dict(color=train['SalePrice'].max() - train['SalePrice'],
                   size=10, colorscale='Viridis'))
layout = go.Layout(
    title='GarageArea vs LotArea vs SalePrice',
    xaxis=dict(
        title='LotArea (square feet)',
        titlefont=dict(
            size=18,
        )
    ),
    yaxis=dict(
        title='GarageArea (square feet)',
        titlefont=dict(
            size=18,
        )
    ),
    showlegend=False
)

data = [trace]
fig = go.Figure(data=data, layout=layout)
ply.iplot(fig)

In [10]:
trace = go.Scatter(x=train['YearBuilt'], y=train['SalePrice'],
                   mode='markers', marker=dict(color=train['OverallQual'],
                   size=train['OverallQual']*2, colorscale='Reds', colorbar={}, opacity=0.7))

layout = go.Layout(
    title='SalePrice vs YearBuilt vs OverallQual',
    xaxis=dict(
        title='YearBuilt',
        titlefont=dict(
            size=18,
        )
    ),
    yaxis=dict(
        title='SalePrice (USD)',
        titlefont=dict(
            size=18,
        )
    ),
    showlegend=False
)

data = [trace]
fig = go.Figure(data=data, layout=layout)
ply.iplot(fig)

In [11]:
trace = go.Scatter(x=train['GarageArea'], y=train['SalePrice'],mode='markers',
                   marker=dict(color=(train['OverallQual'].max()-train['OverallQual']),
                   size=train['OverallQual']*2, colorscale='Viridis', opacity=0.7))
layout = go.Layout(
    title='SalePrice vs GarageArea vs OverallQual',
    xaxis=dict(
        title='GarageArea (Square Feet)',
        titlefont=dict(
            size=18,
        )
    ),
    yaxis=dict(
        title='SalePrice (USD)',
        titlefont=dict(
            size=18,
        )
    )
)
data = [trace]
fig = go.Figure(data=data, layout=layout)
ply.iplot(fig)

# Filter

In [12]:
data = []
vals = sorted(train['OverallQual'].unique())
names = ['Very Poor',
         'Poor',
         'Fair',
         'Below Average',
         'Average',
         'Above Average',
         'Good',
         'Very Good',
         'Excellent',
         'Very Excellent']
colors = cl.scales['3']['seq']['YlOrRd']
colors = cl.to_rgb(cl.interp(colors, len(vals)))
for i, val in enumerate(vals):
    trace = go.Scatter(x=train[train['OverallQual'] == val]['YearBuilt'],
                       y=train[train['OverallQual'] == val]['SalePrice'],
                       mode='markers', marker=dict(size=8, opacity=1.0, color=colors[i]),
                       name='Quality: {}'.format(names[i]))
    data.append(trace)
buttons = []
for i, val in enumerate(names):
    button = {}
    button['label'] = str(val)
    button['method'] = 'update'
    args = {}
    visible = [False] * len(vals)
    visible[i] = True
    args['visible'] = visible
    args['title'] = str(val)
    args = [args]
    button['args'] = args
    buttons.append(button)
    
buttons.append(dict(label='All',
                    method='update',
                    args=[dict(visible=[True] * len(vals),
                              title='All')],
                    )
              )
updatemenus = list([dict(active=-1,
                        buttons=buttons)])

layout = go.Layout(
    title='SalePrice vs YearBuilt vs OverallQual',
    xaxis=dict(
        title='YearBuilt',
        titlefont=dict(
            size=18,
        )
    ),
    yaxis=dict(
        title='SalePrice (USD)',
        titlefont=dict(
            size=18,
        )
    ),
    updatemenus=updatemenus,
    showlegend=True
)

fig = dict(data=data, layout=layout)
ply.iplot(fig)

In [13]:
data = []
vals = sorted(train['OverallQual'].unique())
names = ['Very Poor',
         'Poor',
         'Fair',
         'Below Average',
         'Average',
         'Above Average',
         'Good',
         'Very Good',
         'Excellent',
         'Very Excellent']
colors = cl.scales['3']['seq']['YlGnBu']
colors = cl.to_rgb(cl.interp(colors, len(vals)))
for i, val in enumerate(vals):
    trace = go.Scatter(x=train[train['OverallQual'] == val]['GarageArea'],
                       y=train[train['OverallQual'] == val]['SalePrice'],
                       mode='markers', marker=dict(opacity=1.0, color=colors[i], size=9),
                       name='Quality: {}'.format(names[i]))
    data.append(trace)

buttons = []
for i, val in enumerate(names):
    button = {}
    button['label'] = str(val)
    button['method'] = 'update'
    args = {}
    visible = [False] * len(vals)
    visible[i] = True
    args['visible'] = visible
    args['title'] = str(val)
    args = [args]
    button['args'] = args
    buttons.append(button)
    
buttons.append(dict(label='All',
                    method='update',
                    args=[dict(visible=[True] * len(vals),
                              title='All')],
                    )
              )
updatemenus = list([dict(active=-1,
                        buttons=buttons)])

layout = go.Layout(
    title='SalePrice vs YearBuilt vs OverallQual',
    xaxis=dict(
        title='GarageArea (square feet)',
        titlefont=dict(
            size=18,
        )
    ),
    yaxis=dict(
        title='SalePrice (USD)',
        titlefont=dict(
            size=18,
        )
    ),
    updatemenus=updatemenus,
    showlegend=True
)
fig = go.Figure(data=data, layout=layout)
ply.iplot(fig)

In [14]:
train[cols].corr().as_matrix()

array([[ 1.        ,  0.06998549,  0.15531316,  0.02734063,  0.17755868,
         0.20170592],
       [ 0.06998549,  1.        ,  0.08871877,  0.00659023,  0.16218279,
         0.26986648],
       [ 0.15531316,  0.08871877,  1.        ,  0.57171183,  0.55490469,
         0.80085836],
       [ 0.02734063,  0.00659023,  0.57171183,  1.        ,  0.47731136,
         0.53527943],
       [ 0.17755868,  0.16218279,  0.55490469,  0.47731136,  1.        ,
         0.63696359],
       [ 0.20170592,  0.26986648,  0.80085836,  0.53527943,  0.63696359,
         1.        ]])

In [16]:
trace = go.Heatmap(z=train[cols].corr().as_matrix(),
                   x=cols,
                   y=cols)
layout = go.Layout(
    title='Correlation Heatmap',
)
data=[trace]
fig = go.Figure(data=data, layout=layout)
ply.iplot(fig)