#### Regression Analysis for Materials Sciences - *RAMSES*

In [1]:
#Imports
import ipywidgets as widgets
from ipywidgets import Text,BoundedFloatText,Checkbox,ToggleButtons,Dropdown,VBox,HBox,Accordion,BoundedIntText,SelectMultiple,RadioButtons,FloatRangeSlider,Button,IntSlider,Label,Tab,Output,FileUpload,Layout,FloatSlider

from IPython.display import display,Markdown
import pandas as pd
import sys
from io import StringIO
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
#Components

tab = widgets.Tab()    
out_plotting=Output()
out = widgets.Output(layout={'border': '1px solid black'})
out_pairwise = widgets.Output(layout={'border': '1px solid black'})
out_heat =widgets.Output(layout={'border': '1px solid black'})
out_data_pre_pr=widgets.Output(layout={'border': '1px solid black'})
out_ml=widgets.Output(layout={'border': '1px solid black'})
out_scatter=widgets.Output(layout={'border': '1px solid black'})
out_algo=widgets.Output(layout={'border': '1px solid black'})
out_algo_alt=widgets.Output(layout={'border': '1px solid black'})

out_compare_plot=widgets.Output(layout={'border': '1px solid black'})

up = widgets.FileUpload(accept="", multiple=False)

delim = widgets.RadioButtons(
    options=[';', ',', ' '],
    description='Separator: ',
    disabled=False)
delim_dec = RadioButtons(
    options=[',', '.'],
    description='Decimal delim: ',
    disabled=False)
eraser = widgets.SelectMultiple(
    options=['tab','"'],
    value=['tab'],
    #rows=10,
    description='Eraser: ',
    disabled=False)

rows = widgets.IntSlider(
    value=0,
    step=1,
    description='# of lines:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d')

##ML-###pre processing

feature_selector=widgets.SelectMultiple(
    options=[],
    #tooltip("Select the Input Features")
    #rows=10,
    description='Features',
    disabled=False,
layout=widgets.Layout(width='50%'))
    
target_selection=widgets.Dropdown(
    options=[''],
    value='',
    placeholder='select the target variable',
    description='Target',
    disabled=False,
layout=widgets.Layout(width='50%'))




graph_type = widgets.Dropdown(
    options=['Bar Chart', 'Line Chart'],
    value='Bar Chart',
    description='Chart Type:',
    disabled=False)

x_axis = widgets.Dropdown(
    options=[''],
    value='',
    description='X-Axis:',
    disabled=False)
y_axis = widgets.Dropdown(
    options=[''],
    value='',
    description='Y-Axis:',
    disabled=False)
color_picker = widgets.ColorPicker(
    concise=False,
    description='Color Picker: ',
    value='lightblue',
    disabled=False)
toggle = widgets.ToggleButtons(
    options=['Preview  ', 'Info  ', 'Stats  '],
    description='Options',
    disabled=False,
    button_style='warning',
    icons=['search', 'info', 'tachometer'])

select_x=widgets.Dropdown(
    options=[''],
    value='',
    placeholder='select X-Axis',
    description='X-Axis:',
    disabled=False,
layout=widgets.Layout(width='50%'))
select_y=widgets.Dropdown(
    options=[''],
    value='',
    placeholder='select Y-Axis',
    description='Y-Axis:',
    disabled=False,
layout=widgets.Layout(width='50%'))

select_hue=widgets.Dropdown(
    options=[''],
    value='',
    placeholder='select the hue',
    description='Hue:',
    disabled=False,
layout=widgets.Layout(width='50%'))

select_size=widgets.Dropdown(
    options=[''],
    value='',
    placeholder='select the Size',
    description='Size:',
    disabled=False,
layout=widgets.Layout(width='50%'))


button_confirm_plot_var=Button(
    description='Confirm selection',
    disabled=False,
    button_style='success',
    tooltip='Confirm the selected target variable',
    icon='check',
    layout=Layout(width='50%',height ='inherit'))



In [3]:
#Buttons Anfang
button_upload = widgets.Button(
    description='Upload',
    disabled=False,
    button_style='success',
    tooltip='Click to Upload',
    icon='check',
layout=widgets.Layout(width='50%'))

button_preview = widgets.Button(
    description='Preview',
    disabled=False,
    button_style='info',
    tooltip='Click to Preview',
    icon='search',
layout=widgets.Layout(width='50%'))

button_cut = widgets.Button(
    description='Simplify',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    #icon='fa-scissors'
    layout=widgets.Layout(width='50%')
)

#Plotting
button_plot = widgets.Button(
    description='Plot',
    disabled=False,
    button_style='danger',
    tooltip='Click to Plot',
    icon='pencil',
layout=widgets.Layout(width='50%'))

button_plot_pairwise = widgets.Button(description='Plot',
    disabled=False,
    button_style='danger',
    tooltip='Click to Plot the pairwise relations of the features',
    icon='pencil',
    layout=widgets.Layout(width='50%'))


button_plot_heatmap= widgets.Button(description='Plot',
    disabled=False,
    button_style='warning',
    tooltip='Click to Plot the Pearson Correlation Heatmap',
    icon='pencil',
    layout=widgets.Layout(width='50%'))


button_plot_scatter=widgets.Button(description='Plot',
    disabled=False,
    button_style='warning',
    tooltip='Click to Plot the Pearson Correlation Heatmap',
    icon='pencil',
    layout=widgets.Layout(width='50%'))

##ML
button_split_data=widgets.Button(description='Random train/test split (70/30)',
    disabled=False,
    button_style='warning',
    tooltip='Click to split data into training and test splits',
    icon='fa-scissors',
    layout=widgets.Layout(width='50%')
                                )

button_scaling=widgets.Button(description='Standardize',
    disabled=False,
    button_style='warning',
    tooltip='Click to Standardizing the data',
    icon='fa-arrows',
    layout=widgets.Layout(width='50%'))

###Algos
button_confirm_feature=widgets.Button(
    description='Select input features',
    disabled=False,
    button_style='success',
    tooltip='Confirm Feature Selection of Training Features',
    icon='check',
layout=widgets.Layout(width='50%',height ='inherit'))

button_confirm_target=widgets.Button(
    description='Select  target',
    disabled=False,
    button_style='success',
    tooltip='Confirm the selected Target Variable',
    icon='check',
layout=widgets.Layout(width='50%'))

button_lr = widgets.Button(
    description='Linear Regression',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_lasso= widgets.Button(
    description='Lasso',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_ridge= widgets.Button(
    description='Ridge',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_plot_coefficents=widgets.Button(
    description='Ridge',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_plot_predictions=widgets.Button(
    description='Ridge',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_plot_coefficents=widgets.Button(
    description='Plot Coefficents',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-bar-chart',
    layout=widgets.Layout(width='50%')
)
button_plot_predictions=widgets.Button(
    description='Plot Predictions',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-bar-chart',
    layout=widgets.Layout(width='50%')
)
button_random_forrest= widgets.Button(
    description='Random Forrest',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)
button_dec_tree= widgets.Button(
    description='Descission Trees',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)

button_compare=widgets.Button(
    description='Descission Trees',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-calculator',
    layout=widgets.Layout(width='50%')
)

button_plot_comparision=widgets.Button(
    description='Compare',
    disabled=False,
    button_style='warning',
    tooltip='Simplify the Columns',
    icon='fa-bar-chart',
    layout=widgets.Layout(width='50%')
)
###Algos ende
##ML End
#Buttons Ende

In [4]:
#FileUpload Tab
accordion = widgets.Accordion(children=[
    up, 
    HBox([delim, delim_dec, eraser]),
    rows])
accordion.set_title(0, 'File Selection')
accordion.set_title(1, 'Delimiter')
accordion.set_title(2, 'Skip Rows')
accordion_box = widgets.VBox([
    accordion, 
    widgets.HBox([button_preview, button_upload ]),#removed button_cut
    out
])

In [5]:
#MachineLearning Tab
## AlgorithmContainer

vbox_all_algo=widgets.VBox([widgets.HBox(children=[button_lr,button_lasso,button_ridge])])
vbox_other_algo=widgets.VBox([widgets.HBox(children=[button_dec_tree,button_random_forrest])])
##text about algos
out_text=widgets.Output()
with out_text:
    display(Markdown('####  Regression'))
    display(Markdown('- Linear Regression - No regularisation'))
    display(Markdown('- Lasso Regression - L1 regularisation (Tries to push coefficients to zero)'))
    display(Markdown('- Ridge Regression - L2 regularisation (Tries to keep coefficients as low as possible)'))
    display(Markdown("We will compare these three algorithms:"))
    
    
out_text_2=widgets.Output()
with out_text_2:
    display(Markdown('####  other Models'))
    display(Markdown('There are some other Algortihms that might perform better:'))
    display(Markdown('-  Decision Trees'))
    display(Markdown('- Random Forest Regressor '))
    display(Markdown("We will compare these two algorithms here:"))

compare_text=widgets.Output()
with compare_text:
    display(Markdown('After executing and plotting all those 5 Algorithmns we compare the RSME of them to see which performed the best '))
    

buttons_confirm_container=widgets.HBox(children=[button_split_data, button_scaling],layout=widgets.Layout(width='100%'))


ml_accordion=widgets.Accordion(children=
                               [widgets.VBox([
        widgets.HBox([feature_selector, button_confirm_feature ]),
        widgets.HBox([target_selection, button_confirm_target]),
       buttons_confirm_container,out_data_pre_pr])
                                         
       ,widgets.VBox([
        widgets.HBox([out_text]),
       vbox_all_algo,
        out_ml,
           widgets.HBox([button_plot_coefficents,button_plot_predictions]),
           out_algo
    ])
        ,widgets.VBox([
        widgets.HBox([out_text_2]),
       vbox_other_algo,
        out_algo_alt]),
        
     widgets.VBox([
        widgets.HBox([compare_text]),
         widgets.HBox([button_plot_comparision]),
        out_compare_plot]),
                                
                               ])

ml_accordion.set_title(0,"Data pre-pocessing")
ml_accordion.set_title(1,"Linear regression")
ml_accordion.set_title(2,"Decision tree regression")
ml_accordion.set_title(3,"Results")


In [6]:
#Plotting
selector_plot_variable=SelectMultiple(
    options=[],
    description='Features',
    disabled=False,
    layout=Layout(width='50%'))
graph_type = Dropdown(
    options=['Choose graph type','Scatter', 'Scatter Matrix', 'Correlation Heatmap'],
    value='Choose graph type',
    description='Graph type:',
    disabled=False,
    layout=Layout(width='50%'))
x_axis = Dropdown(
    options=[''],
    value='',
    description='X-Axis:',
    disabled=False)
y_axis = Dropdown(
    options=[''],
    value='',
    description='Y-Axis:',
    disabled=False)

select_x=Dropdown(
    options=[''],
    value='',
    placeholder='select X-axis',
    description='X-Axis:',
    disabled=False,
    layout=Layout(width='50%'))

select_y=Dropdown(
    options=[''],
    value='',
    placeholder='select Y-axis',
    description='Y-Axis:',
    disabled=False,
    layout=Layout(width='50%'))

select_hue=Dropdown(
    options=[''],
    value='',
    placeholder='select the hue',
    description='Hue:',
    disabled=False,
    layout=Layout(width='50%'))

select_size=Dropdown(
    options=[''],
    value='',
    placeholder='select the size',
    description='Size:',
    disabled=False,
    layout=Layout(width='50%'))
container_plot_options= VBox([])
button_container=HBox([button_plot])

plotting=VBox(children=[VBox( [
        HBox([graph_type]),
        container_plot_options,
        button_container,
        out_plotting
        ]
)])

children = [
    accordion_box, 
    widgets.VBox([toggle, out]),
    plotting,
    ml_accordion
    
   ]

#Plotting Methods
import seaborn as sns

def plot():
    graph = graph_type.value
    if graph=="Scatter":
        plot_scatter()
    elif graph=="Correlation Heatmap":
            plot_heat()
    elif graph=="Scatter Matrix":
            plot_pairwise()
          
        
def plot_pairwise():
    df =confirm_var()
    with out_plotting:
        out_plotting.clear_output()
        sns.pairplot(df)
        plt.show()

def plot_heat():
    df = confirm_var()
    with out_plotting:
        out_plotting.clear_output()
        corr = df.corr()
        plt.figure(figsize=(12,7))
        sns.heatmap(corr, annot=True, cmap='Blues')
        b, t = plt.ylim()
        plt.ylim(b+0.5, t-0.5)
        plt.title("Feature Correlation Heatmap")
        plt.show()
            
def plot_scatter():
    data=df_converter()
    with out_plotting:
        out_plotting.clear_output()
        fig, ax = plt.subplots(figsize=(12,7))
        #not generic
        sns.scatterplot(y=select_y.value, x=select_x.value, hue=select_hue.value, size=select_size.value, data=data, ax=ax, sizes=(50, 300))
        ax.set_title(select_y.value+ "vs"+ select_x.value)
        ax.legend(loc="upper left", bbox_to_anchor=(1,1))
        plt.show()
        plt.close(fig)  

        

def confirm_var():
    df= df_converter()
    selection = list(selector_plot_variable.value)
    var = df[selection]
 
    return var

In [7]:
#Tabs
tab.children = children
tab.set_title(0, "🚀 Upload")
tab.set_title(1, "📊 Data Info")
tab.set_title(2, "🧭 Design Space Explorer")
tab.set_title(3,"🤖 Regression Analysis")

In [8]:
#EventHandling

def content_parser():
    if up.value == {}:
        with out:
            display(Markdown('No CSV loaded'))
            #print('No CSV loaded')    
    else:
        typ, content = "", ""
        up_value = up.value
        for i in up_value.keys():
            typ = up_value[i]["metadata"]["type"]
            if typ == "text/csv":
                content = up_value[i]["content"]
                content_str = str(content, 'utf-8')

                if eraser.value != {}: 
                    for val in eraser.value:
                        if val == "tab":
                            content_str = content_str.replace("\t","")
                        else:
                            content_str = content_str.replace(val,"")
                if content_str != "":
                    str_io = StringIO(content_str) 
                    return str_io
def df_converter():
    content = content_parser()
    if content is not None:
        df = pd.read_csv(content, sep=delim.value, index_col=False, skiprows=rows.value,decimal=delim_dec.value)            
            
                
        return df
    else:
        return None
def preview():
    
    df = df_converter()
    with out:
        out.clear_output()
        display(Markdown('This is the DF:'))
        
        if df is not None:
            display(Markdown(df.head(10).to_markdown()))
            
        else:
            display(Markdown('Configuration is wrong/missing...'))
            
def upload():
    
    df = df_converter()
    with out:
        out.clear_output()
        display(Markdown('The uploaded DF looks like:'))
       
        if df is not None:
                display(Markdown(df.head(10).to_markdown()))
                x_axis.options = df.columns
                y_axis.options = df.columns
                feature_selector.options= df.columns
                select_x.options=df.columns
                select_y.options=df.columns
                select_size.options=df.columns
                select_hue.options=df.columns
                selector_plot_variable.options=df.columns


        else:
            display(Markdown('Configuration is wrong/missing...'))
            #print('Configuration is wrong/missing...')
def desc():
    info_level = toggle.value
    if info_level != {}:
        df = df_converter()
        with out:
            out.clear_output()
            display(Markdown('\n Data {} \n'.format(info_level)))
            if df is not None:
                if info_level == 'Info  ':
                    df.info()
                elif info_level == 'Stats  ':
                    display(Markdown(df.describe().to_markdown()))
                elif info_level == 'Preview  ':
                    display(Markdown(df.head(10).to_markdown()))
                else:
                    display(Markdown('Configuration is wrong/missing...'))
    
def simplify_colomns():
    content = content_parser()
    if content is not None:
            df = pd.read_csv(content, sep=delim.value, index_col=False, skiprows=rows.value)
            
            df.columns=df.columns.str.replace(r"\(.*\)","")
            df.columns=df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
                
           
            with out:
                out.clear_output()
                display(Markdown('This is the DF:'))
                if df is not None:
                    print(df.head(10))
                else:
                    display(Markdown('Configuration is wrong/missing...'))
                return df
    else:
        return None
    
    
#plot methods   

def plot():
    graph = graph_type.value
    if graph != {}:
        df = df_converter()
        with out:
            out.clear_output()
            print('\n ------Your {} looks like:------ \n'.format(
                graph))
            if (df is not None):
                df = df.head(5)
                height = df[y_axis.value]
                bars = df[x_axis.value]
                y_pos = np.arange(len(height))
                plt.figure(figsize=(10,4))
                if graph == 'Bar chart':
                    plt.bar(
                        y_pos, 
                        height, 
                        color=color_picker.value)
                    plt.xticks(y_pos, bars)
                elif graph == 'Line chart':
                    plt.plot(
                        bars,
                        height, 
                        color=color_picker.value,
                        marker='o', 
                        linestyle='solid'
                    )
                    plt.xticks(bars)
                plt.show()

                
def plot_pairwise():
    
    df = df_converter()
    with out_pairwise:
        sns.pairplot(df)
        plt.show()

def plot_heat():
    df = df_converter()
    with out_heat:
        corr = df.corr()

        plt.figure(figsize=(9,7))
        sns.heatmap(corr, annot=True, cmap='Blues')
        b, t = plt.ylim()
        plt.ylim(b+0.5, t-0.5)
        plt.title("Correlation heatmap")
        plt.show()
            
def plot_scatter():
    data=df_converter()
    with out_scatter:
        fig, ax = plt.subplots(figsize=(10,7))
        #not generic
        sns.scatterplot(y=select_y.value, x=select_x.value, hue=select_hue.value, size=select_size.value, data=data, ax=ax, sizes=(50, 300))
        ax.set_title(select_y.value+ "vs"+ select_x.value)
        ax.legend(loc="upper left", bbox_to_anchor=(1,1))
        plt.show()
    

In [9]:
#ml eventhandling
#pre pocessing
def confirm_features():
    df = df_converter()
    train = feature_selector.value
    
    target_selection.options=df.columns[~df.columns.isin(feature_selector.value)]
     
    with out_data_pre_pr:
        out_data_pre_pr.clear_output
        display(Markdown('The selected features are:'))
        print(train)
    
def confirm_target():
    target_Var=target_selection.value
    with out_data_pre_pr:
        out_data_pre_pr.clear_output
        display(Markdown('The target variable is:'))
        print(target_Var)

def split_data(self):
    df=df_converter()
    X=df.loc[:,df.columns.isin(feature_selector.value)]
    y=target_selection.value
    index=df.columns.get_loc(target_selection.value)
    
    Y=df.iloc[:,index].to_frame()
    
    from sklearn.model_selection import train_test_split

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=2)
    
    
    with out_data_pre_pr:
            out_data_pre_pr.clear_output
            display(Markdown('Data has been splittet into training and test sets:'))
           
    sets=[X_train, X_test, Y_train, Y_test]
    
    return sets

def scale_data(self):
    sets= split_data(self)
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train= sets[0]
    X_test=sets[1]
    
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    with out_data_pre_pr:
        display(Markdown('Data has been normalized! '))
        

        


In [10]:
#Algos
class Algos:
    
    def __init__(self, dataframe, lr_alg, lasso_alg, ridge_alg,des_tree_alg,rand_for_alg):  #constructor
        self.dataframe = dataframe
        self.lr_alg=lr_alg
        self.lasso_alg=lasso_alg
        self.ridge_alg=ridge_alg
        self.des_tree_alg=des_tree_alg
        self.rand_for_alg=rand_for_alg
   
    def compare(self):
        sets=split_data(self)
        X_train = sets[0]
        X_test=sets[1]
        y_train = sets[2]
        y_test = sets[3]
        
        models = [lr, lasso, ridge, dtr, rfr]
        names = ["Linear Regression", "Lasso Regression", "Ridge Regression", 
                 "Decision Tree Regression", "Random Forest Regression"]
        rmses = []
        with out_compare_plot:
            out_compare_plot.clear_output()
            for model in models:
                rmses.append(np.sqrt(mean_squared_error(y_test, model.predict(X_test))))

            x = np.arange(len(names)) 
            width = 0.3

            fig, ax = plt.subplots(figsize=(10,7))
            rects = ax.bar(x, rmses, width)
            ax.set_ylabel('RMSE')
            ax.set_xlabel('Models')
            ax.set_title('Model performance')
            ax.set_xticks(x)
            ax.set_xticklabels(names, rotation=45)
            self.autolabel(rects,ax)
            fig.tight_layout()
            plt.show()
        
    #sets=[X_train, X_test, Y_train, Y_test]
    def des_tree(self):
        sets=split_data(self)
        X_train = sets[0]
        X_test=sets[1]
        y_train = sets[2]
        y_test = sets[3]
        #dtr = DecisionTreeRegressor()
        dtr.fit(X_train, y_train)
        
        y_pred_dtr = dtr.predict(X_test)
        with out_algo_alt:
            out_algo_alt.clear_output
            print("Model\t\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
            print("""Decision Tree Regressor \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
                        np.sqrt(mean_squared_error(y_test, y_pred_dtr)),mean_squared_error(y_test, y_pred_dtr),
                        mean_absolute_error(y_test, y_pred_dtr), r2_score(y_test, y_pred_dtr)))

            plt.scatter(y_test, y_pred_dtr)
            plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
            plt.xlabel("Predicted")
            plt.ylabel("True")
            plt.title("Decision tree regression")
            plt.show()
        
    
    def rand_for(self):
        sets=split_data(self)
        X_test=sets[1]
        X_train = sets[0]
        y_train = sets[2]
        y_test = sets[3]
        
        rfr.fit(X_train, y_train)

        y_pred_rfr = rfr.predict(X_test)
        with out_algo_alt:
            print("Model\t\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
            print("""Random forest regression \t {:.2f} \t\t {:.2f} \t\t{:.2f} \t\t{:.2f}""".format(
                        np.sqrt(mean_squared_error(y_test, y_pred_rfr)),mean_squared_error(y_test, y_pred_rfr),
                        mean_absolute_error(y_test, y_pred_rfr), r2_score(y_test, y_pred_rfr)))

            plt.scatter(y_test, y_pred_rfr)
            plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
            plt.xlabel("Predicted")
            plt.ylabel("True")
            plt.title("Random forest regression")
            plt.show()
        
    
    def linreg(self):
        sets=split_data(self)
        
        # Linear Regression
       #lr = LinearRegression()
        X_train = sets[0]
        Y_train = sets[2]
        lr.fit(X_train, Y_train)
        Y_pred_lr = lr.predict(sets[1])
        
        with out_ml:
            print("Model\t\t\t RMSE \t\t MSE \t\t MAE \t\t R2")
            print("""Linear regression \t {:.2f} \t\t {:.2f} \t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(sets[3], Y_pred_lr)),mean_squared_error(sets[3], Y_pred_lr),
            mean_absolute_error(sets[3], Y_pred_lr), r2_score(sets[3], Y_pred_lr)))
            
        return Y_pred_lr

    def lassoreg(self):
        sets=split_data(self)
        from sklearn.linear_model import Lasso

        X_train = sets[0]
        Y_train = sets[2]
        #lasso = Lasso()
        lasso.fit(X_train, Y_train)
        Y_pred_lasso = lasso.predict(sets[1])
        with out_ml:
                #could be refactored
            
            print("""Lasso     \t {:.2f} \t\t {:.2f} \t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(sets[3], Y_pred_lasso)),mean_squared_error(sets[3], Y_pred_lasso),
            mean_absolute_error(sets[3], Y_pred_lasso), r2_score(sets[3], Y_pred_lasso)))
        return Y_pred_lasso

    def ridgereg(self):
        sets=split_data(self)
        from sklearn.linear_model import Ridge
        #ridge = Ridge()
        X_train = sets[0]
        Y_train = sets[2]
        ridge.fit(X_train, Y_train)
        Y_pred_ridge = ridge.predict(sets[1])
        with out_ml:
                #could be refactored
            
            print("""Ridge regression \t {:.2f} \t\t {:.2f} \t{:.2f} \t\t{:.2f}""".format(
            np.sqrt(mean_squared_error(sets[3], Y_pred_ridge)),mean_squared_error(sets[3], Y_pred_ridge),
            mean_absolute_error(sets[3], Y_pred_ridge), r2_score(sets[3], Y_pred_ridge)))
        return Y_pred_ridge
##plot
   
    def plot_coef(self):
        df=df_converter()
        coeff_lr = lr.coef_.squeeze()
        coeff_lasso = lasso.coef_.squeeze()
        coeff_ridge = ridge.coef_.squeeze()
        with out_algo:
            labels = df.columns[:-1].tolist()
            
            x = np.arange(len(labels))
            width = 0.3
            x_graph=x - 2*(width/2)
            fig, ax = plt.subplots(figsize=(10,10))
            rects1 = ax.bar(x_graph, coeff_lr, width, label='LR')
            rects2 = ax.bar(x, coeff_lasso, width, label='Lasso')
            rects3 = ax.bar(x + 2*(width/2), coeff_ridge, width, label='Ridge')

            ax.set_ylabel('Coefficient')
            ax.set_xlabel('Features')
            ax.set_title('Regression coefficient weights')
            ax.set_xticks(x)
            ax.set_xticklabels(labels, rotation=45)
            ax.legend()


            self.autolabel(rects1,ax)
            self.autolabel(rects2,ax)
            self.autolabel(rects3,ax)

            fig.tight_layout()
            plt.show()
            
    def autolabel(self,rects,ax):
            """Attach a text label above each bar in *rects*, displaying its height."""
            for rect in rects:
                height = rect.get_height()
                
                ax.annotate('{:.2f}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3), textcoords="offset points", ha='center', va='bottom')
    
    

In [11]:
lr = LinearRegression()
lasso=Lasso()
ridge=Ridge()
dtr = DecisionTreeRegressor()
rfr = RandomForestRegressor(n_estimators=100)


a=Algos(df_converter(),lr,lasso,ridge,dtr,rfr)



In [12]:
def plot_pred(self):
    sets=split_data(self)
    y_test=sets[3]
    fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(12,4))
    
        #sets=[X_train, X_test, Y_train, Y_test]
    with out_algo:
        out_algo.clear_output()
        
       
        y_pred_lr=a.linreg()
        ax1.scatter(y_pred_lr, y_test, s=20)
        ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
        ax1.set_ylabel("True")
        ax1.set_xlabel("Predicted")
        ax1.set_title("Linear regression")

        y_pred_lasso=a.lassoreg()
        ax2.scatter(y_pred_lasso, y_test, s=20)
        ax2.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
        ax2.set_ylabel("True")
        ax2.set_xlabel("Predicted")
        ax2.set_title("Lasso regression")
        
        y_pred_ridge=a.ridgereg()
        ax3.scatter(y_pred_ridge, y_test, s=20)
        ax3.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
        ax3.set_ylabel("True")
        ax3.set_xlabel("Predicted")
        ax3.set_title("Ridge regression")

        fig.suptitle("True vs Predicted")
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.show()
        

In [13]:
#EventListener             
def preview_clicked(b):
    preview()
    
def upload_clicked(b):
    if(up._counter>1):
        up.value.clear()
        up._counter = 1
    try:
        upload()
    except pd.errors.ParserError:
        with out:
            out.clear_output()
            display(Markdown("Sth. wennt wrong! Please check your csv and the upload settings"))

def desc_clicked(b):
    desc()

def plotter_clicked(b):
    plot()

def pairwise_clicked(b):
    plot_pairwise()
def heat_clicked(b):
    plot_heat()
def scatter_clicked(b):
    plot_scatter()
    
def simplify_clicked(b):
    simplify_colomns()

    
#ml
def confirm_features_clicked(b):
    confirm_features()
    
def confirm_target_clicked(b):
    confirm_target()
    



def split_data_clicked(b):
    split_data()

    
def scale_data_clicked(b):
    scale_data()

def plot_coef_clicked(b):
    a.plot_coef()
def plot_pred_clicked(b):
    plot_pred(a)
def lr_clicked(b):
    a.linreg()
def lasso_clicked(b):
    a.lassoreg()
    
def ridge_clicked(b):
    a.ridgereg()
def rand_for_clicked(b):
    a.rand_for()
def dec_tree_clicked(b):
    a.des_tree()
    
def compare_clicked(b):
    a.compare()
    
def confirm_var_clicked(b):
    confirm_var()
    
def on_graph_type_change(change):
        
    if graph_type.value =="Scatter":
        container_plot_options.children= [HBox([select_x,select_y]),
        HBox([select_hue,select_size])]
    elif graph_type.value =="Scatter Matrix":
        
        container_plot_options.children= [HBox([selector_plot_variable,button_confirm_plot_var])]
                                          
    elif graph_type.value =='Correlation Heatmap':
        container_plot_options.children= [HBox([selector_plot_variable,button_confirm_plot_var])]
        
    else: container_plot_options.children=[]
   

In [14]:
#Eventss
#plot
button_plot_pairwise.on_click(pairwise_clicked)
button_plot_heatmap.on_click(heat_clicked)
button_preview.on_click(preview_clicked)
button_plot_scatter.on_click(scatter_clicked)


#ml
button_confirm_feature.on_click(confirm_features_clicked)
button_confirm_target.on_click(confirm_target_clicked)
button_split_data.on_click(split_data)
button_scaling.on_click(scale_data)

button_dec_tree.on_click(dec_tree_clicked)
button_random_forrest.on_click(rand_for_clicked)

button_plot_comparision.on_click(compare_clicked)



button_upload.on_click(upload_clicked)
toggle.observe(desc_clicked, 'value')
button_preview.on_click(preview_clicked)

button_confirm_plot_var.on_click(confirm_var_clicked)
button_confirm_feature.on_click(confirm_features_clicked)
button_confirm_target.on_click(confirm_target_clicked)

graph_type.observe(on_graph_type_change,names='value')






button_plot.on_click(plotter_clicked)

button_plot_coefficents.on_click(plot_coef_clicked)
button_plot_predictions.on_click(plot_pred_clicked)

#algo
button_lr.on_click(lr_clicked)
button_lasso.on_click(lasso_clicked)
button_ridge.on_click(ridge_clicked)

#upload etc
button_cut.on_click(simplify_clicked)
button_upload.on_click(upload_clicked)
toggle.observe(desc_clicked, 'value')
button_plot.on_click(plotter_clicked)

In [15]:
tab

Tab(children=(VBox(children=(Accordion(children=(FileUpload(value={}, description='Upload'), HBox(children=(Ra…