In [35]:
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import binascii
from StringIO import StringIO
import ipywidgets as widgets
from ipywidgets import HTML
import numpy as np
import statsmodels.formula.api as sm
from pandas.tools.plotting import scatter_matrix
from scipy import random, linalg
# import seaborn as sns
# sns.set_style('whitegrid')

plt.interactive(False) # This will prevent matplotlib from showing the plots immediately 

In [36]:
widgets_dict = {}

In [37]:
# widget - Variance of x
var_x = widgets.FloatSlider(min=1, max=10, value=5, description="Variance of x", continuous_update=False)
widgets_dict["var_x"] = var_x
var_x

In [38]:
num_points = widgets.IntSlider(min=10, max=1000, value=100, description="Number of points", continuous_update=False)
widgets_dict["num_points"] = num_points
num_points

In [39]:
num_x_vars = widgets.IntSlider(min=1, max=6, value=1, description="Number of x vars", continuous_update=False)
widgets_dict["num_x_vars"] = num_x_vars
num_x_vars

In [40]:
var_e = widgets.FloatSlider(min=0.0001, max=100, value=1, description="Var(e)", continuous_update=False)
widgets_dict["var_e"] = var_e
var_e

In [41]:
x_vars = widgets.SelectMultiple(description="Choose x variables", continuous_update=False)
widgets_dict["x_vars"] = x_vars
widgets_dict["x_vars"].options = ["x{}".format(i+1) for i in range(widgets_dict["num_x_vars"].value)]
widgets_dict["x_vars"].value = widgets_dict["x_vars"].options
x_vars

In [42]:
# html output 1 - sample of data
data_snippet = widgets.HTML()
data_snippet

In [43]:
# html output 2 - regression coefficients
regression_results = widgets.HTML()
regression_results

In [44]:
# html output 3 - pair_plot_residuals
pair_plot_residuals = widgets.HTML()
pair_plot_residuals

In [45]:
# html output 4 - pair_plot_data
pair_plot_data = widgets.HTML()
pair_plot_data

In [46]:
regression_model_html_output = widgets.HTML()
regression_model_html_output

In [47]:
# If we want to display output in an arbitrary display area, this is a workaround
def plot_to_html(fig):
    # open IO object
    sio = StringIO()
    fig.canvas.print_png(sio)
    img_data = binascii.b2a_base64(sio.getvalue())
    img_html = '<img src="data:image/png;base64,{}&#10;">'.format(img_data)
    return img_html

### Choose characteristics of regression model

### Results of regression model

In [48]:
class RegressionModel():

    def __init__(self):
        self.parameters = {}
        self.regression_data = None

    def run(self, parameters):
        
        clear_output()

        saved_values = widgets_dict["x_vars"].value
        widgets_dict["x_vars"].options = ["x{}".format(i+1) for i in range(widgets_dict["num_x_vars"].value)]
#         widgets_dict["x_vars"].value = [v for v in saved_values if v in widgets_dict["x_vars"].options]
    
        self.parameters = parameters

        self.regression_data = self.get_regression_data()
        self.run_regression()
        
        
        data_snippet.value = self.regression_data.head().to_html(classes =["rendered_html"])
#         html_template = '<div class="rendered_html  " style="margin-left:0px !important; ">{}</div>'
        html_template = '<code>{}</code>'
        regression_results.value = html_template.format(self.regression_results.summary().as_text())
        
        html_template = "<code>Real formula: {}</code>"
        regression_model_html_output.value = html_template.format(self.real_formula)
        
        pair_plot_residuals.value = plot_to_html(self.run_pair_plot_residuals())
        pair_plot_data.value = plot_to_html(self.run_pair_plot_original_data())

        

        # Only compute the pair plots if the options are selected


    def get_regression_data(self):

        def random_cov(num_variables):
            A = random.rand(num_variables,num_variables)
            B = np.dot(A,A.transpose())
            B = B/np.max(B*1.1)
            np.fill_diagonal(B,1)
            return B
        

        def is_pos_def(x):
            return np.all(np.linalg.eigvals(x) > 0)

        def get_pos_def_cov(num_variables):
            while True:
                 cov = random_cov(num_variables)
                 if is_pos_def(cov):
                    break
            return cov

        num_points = self.parameters["num_points"]
        num_x_vars = self.parameters["num_x_vars"]
        var_x = self.parameters["var_x"]
        var_e = self.parameters["var_e"]

        cov = get_pos_def_cov(num_x_vars)

        
        x = np.random.multivariate_normal([var_x]*num_x_vars, cov,num_points) 
        e = np.random.normal(0,var_e,num_points)

        coefficients = np.random.uniform(-2,2,num_x_vars)

        var_names = ["x{}".format(i+1) for i in range(num_x_vars)]

        real_formula = "y = {} + error"
        mid_formula_str = " + ".join(["{}*{}".format(t[0],t[1]) for t in zip(coefficients,var_names)])
        self.real_formula = real_formula.format(mid_formula_str)

        df = pd.DataFrame(x, columns=var_names)
        df["e"] = e
        df["y"] = 0
        for i, c in enumerate(coefficients):
            df["y"] += c*df["x{}".format(i+1)]
        df["y"] += df["e"]

        return df

    def run_pair_plot_original_data(self):

        cols = [c for c in self.regression_data.columns if c != "e" and c !="resid"]

        fig1, ax1 = plt.subplots()  
        scatter_matrix(self.regression_data[cols], alpha=0.2, figsize=(20, 20), diagonal='kde', ax=ax1)
        # g = sns.PairGrid(self.regression_data, diag_sharey=False)
        # g.map_lower(sns.kdeplot, cmap="Blues_d")
        # g.map_upper(plt.scatter)
        # g.map_diag(sns.kdeplot, lw=3)
        return fig1

    def run_pair_plot_residuals(self):

        cols = [c for c in self.regression_data.columns if c != "e"]

        
        # g = sns.PairGrid(self.regression_data, diag_sharey=False)
        # g.map_lower(sns.kdeplot, cmap="Blues_d")
        # g.map_upper(plt.scatter)
        # g.map_diag(sns.kdeplot, lw=3)
        fig1, ax1 = plt.subplots()  
        scatter_matrix(self.regression_data[cols], alpha=0.2, figsize=(20, 20), diagonal='kde', ax=ax1)

        # Then place your plots on the relevant placeholders:
        return fig1
        

    def run_regression(self):

        def create_formula():
            formula_template = "y ~ {}"
            deps = " + ".join(self.parameters["x_vars"])
            formula = formula_template.format(deps)
            return formula

        results = sm.ols(formula=create_formula(), data=self.regression_data).fit()

        self.regression_results = results
        self.regression_data["resid"] = results.resid

In [49]:
# Model
model = RegressionModel()

In [50]:
def convert_if_int(el):
        """
        If type is float but it's really an integer return interger
        """
        if type(el) == float:
            if el.is_integer():
                 return int(el)
        return el
        
def get_parameters():
    p = {}
    for k,v in widgets_dict.iteritems():
        p[k] = convert_if_int(v.value)
    print p
    return p
    

def widget_observer(callee):

    if callee["type"] == "change" and callee["name"] == "value":
        parameters = get_parameters()
        model.run(parameters)

In [51]:
for k,v in widgets_dict.iteritems():
    v.observe(widget_observer)