In [53]:
import seaborn as sns
import os, glob
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import palettable
from palettable.tableau import Tableau_20
%matplotlib inline
sns.set_style('darkgrid')

In [54]:
def draw_scatter(category, x_filename, y_filename, metacats):
    df_x = pd.read_csv(x_filename)
    df_y = pd.read_csv(y_filename)
    
    cols = df_x.columns
    cols = cols.intersection(df_y.columns)
    cols = cols.difference(metacats)

    df = pd.merge(df_x, df_y, on=metacats, how='outer', suffixes=(x_filename, y_filename))
    df = df.set_index(metacats)

    plt.rcParams['figure.figsize'] = 10, 10  # that's default image size for this interactive session
    weights = df['Weight'] * 500 / df['Weight'].max()
    max_colors = 20
    marker_colors = Tableau_20.mpl_colors
    
    for i, col in enumerate(cols):
        x = df[col + x_filename]
        y = df[col + y_filename]

        marker_color = marker_colors[i % max_colors]
        plt.scatter(x, y, s=weights, c=marker_color, alpha=0.5, label=col)

    # y=x line
    ax = plt.gca()
    lims = [0,1]

    # now plot both limits against eachother
    ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)

    # +20% line
    ax.plot(lims, [0.2, 1.2], 'k-', alpha=0.1, zorder=0)

    # +20% line
    ax.plot(lims, [-0.2, 0.8], 'k-', alpha=0.1, zorder=0)

    ax.set_aspect('equal')
    ax.set_xlim(lims)
    ax.set_ylim(lims)

    plt.legend()

    title_size = 20
    axis_label_size = 24
    tick_size = 16

    ax.set_xlabel('Input (RBSA Query)', fontsize=axis_label_size)
    ax.set_ylabel('Output (Models)', fontsize=axis_label_size)

    plt.tick_params(axis='both', which='major', labelsize=tick_size)
    plt.title(category, fontsize=title_size)
    
    filename = os.path.join('saved images','{} vs {}.png'.format(x_filename, y_filename).replace(os.sep,'_'))
    plt.savefig(filename, bbox_inches='tight', dpi=200)
    plt.clf()
    print "Plotted {}".format(filename)

In [55]:
category = 'Walls'
x_filename = os.path.join('Walls','WallsInput.csv')
y_filenames = glob.glob(os.path.join('Walls','WallsOutput*.csv'))
metacats = ['Location','Vintage']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)
    
category = 'Attic'
x_filename = os.path.join('Attic','AtticInput.csv')
y_filenames = glob.glob(os.path.join('Attic','AtticOutput*.csv'))
metacats = ['Location','Vintage']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

category = 'Ducts'
x_filename = os.path.join('Ducts','DuctsInput.csv')
y_filenames = glob.glob(os.path.join('Ducts','DuctsOutput*.csv'))
metacats = ['Foundation Type','Vintage']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

category = 'Air Source Heat Pump'
x_filename = os.path.join('ASHP','ASHPInput.csv')
y_filenames = glob.glob(os.path.join('ASHP','ASHPOutput*.csv'))
metacats = ['Location','Vintage','Heating Fuel']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

category = 'Electric Baseboard'
x_filename = os.path.join('Baseboard','BaseboardInput.csv')
y_filenames = glob.glob(os.path.join('Baseboard','BaseboardOutput*.csv'))
metacats = ['Location','Vintage','Heating Fuel']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

category = 'Boiler'
x_filename = os.path.join('Boiler','BoilerInput.csv')
y_filenames = glob.glob(os.path.join('Boiler','BoilerOutput*.csv'))
metacats = ['Location','Vintage','Heating Fuel']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

category = 'Furnace'
x_filename = os.path.join('Furnace','FurnaceInput.csv')
y_filenames = glob.glob(os.path.join('Furnace','FurnaceOutput*.csv'))
metacats = ['Location','Vintage','Heating Fuel']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)
    
category = 'Air Leakage'
x_filename = os.path.join('Infiltration','InfiltrationInput.csv')
y_filenames = glob.glob(os.path.join('Infiltration','InfiltrationOutput*.csv'))
metacats = ['Vintage','Size']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)
    
category = 'Windows'
x_filename = os.path.join('Windows','WindowsInput.csv')
y_filenames = glob.glob(os.path.join('Windows','WindowsOutput*.csv'))
metacats = ['Location','Vintage']
for y_filename in y_filenames:
    draw_scatter(category, x_filename, y_filename, metacats)

Plotted saved images\Walls_WallsInput.csv vs Walls_WallsOutput1000.csv.png
Plotted saved images\Walls_WallsInput.csv vs Walls_WallsOutput2500.csv.png
Plotted saved images\Walls_WallsInput.csv vs Walls_WallsOutput500.csv.png
Plotted saved images\Walls_WallsInput.csv vs Walls_WallsOutput5000.csv.png
Plotted saved images\Attic_AtticInput.csv vs Attic_AtticOutput1000.csv.png
Plotted saved images\Attic_AtticInput.csv vs Attic_AtticOutput2500.csv.png
Plotted saved images\Attic_AtticInput.csv vs Attic_AtticOutput500.csv.png
Plotted saved images\Attic_AtticInput.csv vs Attic_AtticOutput5000.csv.png
Plotted saved images\Ducts_DuctsInput.csv vs Ducts_DuctsOutput1000.csv.png
Plotted saved images\Ducts_DuctsInput.csv vs Ducts_DuctsOutput2500.csv.png
Plotted saved images\Ducts_DuctsInput.csv vs Ducts_DuctsOutput500.csv.png
Plotted saved images\Ducts_DuctsInput.csv vs Ducts_DuctsOutput5000.csv.png
Plotted saved images\ASHP_ASHPInput.csv vs ASHP_ASHPOutput1000.csv.png
Plotted saved images\ASHP_ASHPIn

<matplotlib.figure.Figure at 0xb052530>