## t-SNE

In [40]:
import plotly.express as px
import plotly.graph_objects as go
from sklearn.manifold import TSNE
import pandas as pd
import numpy as np

import DataRetriever as dr

retriever = dr.DataRetriever()
hour = retriever.get_data("All-Subsystems-hour-Year2.pkl")

producing = retriever.get_attributes("producing_attributes.pkl")
consuming = retriever.get_attributes("consuming_attributes.pkl")

hour.dropna(subset=producing + consuming, axis=0, inplace=True) # Drop the relevant rows which has NaN values.

pd.options.mode.chained_assignment = None

In [41]:
#temp = hour.loc[:, ~hour.columns.isin(producing)]
X =  hour[consuming].copy() # hour.groupby(pd.Grouper(key="Timestamp", axis=0, freq='D')).sum()[consuming]
print(len(X.columns.tolist()))
#X.drop_duplicates(inplace=True)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X = scaler.fit_transform(X)

n_components = 2
tsne = TSNE(n_components, random_state=0, init="pca", learning_rate="auto")
tsne_result = tsne.fit_transform(X)
tsne_result = pd.DataFrame(tsne_result)
tsne_result

34



The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



Unnamed: 0,0,1
0,-15.731414,-72.254936
1,-14.642564,-73.713203
2,-14.894239,-73.212875
3,-14.450541,-73.406609
4,-14.797542,-73.314117
...,...,...
8667,60.158787,-28.907974
8668,40.268089,-52.081150
8669,33.264790,-77.292526
8670,22.864946,-12.871217


#### Add extra information to the t-SNE result df

In [42]:
tsne_result["DoW"] = (hour["Timestamp"].dt.day_name()).reset_index(drop=True)

In [43]:
tsne_result["Hour"] = (hour["Timestamp"].dt.hour).reset_index(drop=True)

In [44]:
tsne_result["Clothes Washer"] = np.where(hour["Load_ClothesWasherPowerWithStandby"] > 20, "On", "Off")
tsne_result["Oven"] = np.where(hour["Load_OvenPowerTotal"] > 20, "On", "Off")

In [45]:
tsne_result["Dishwasher"] = np.where(hour["Elec_PowerDishwasher"] > 20, "On", "Off")
tsne_result["Microwave"] = np.where(hour["Load_MicrowavePowerWithStandby"] > 20, "On", "Off")
tsne_result["Dryer"] = np.where(hour["Load_DryerPowerTotal"] > 20, "On", "Off")

In [46]:
tsne_result

Unnamed: 0,0,1,DoW,Hour,Clothes Washer,Oven,Dishwasher,Microwave,Dryer
0,-15.731414,-72.254936,Sunday,0,Off,Off,Off,Off,Off
1,-14.642564,-73.713203,Sunday,1,Off,Off,Off,Off,Off
2,-14.894239,-73.212875,Sunday,2,Off,Off,Off,Off,Off
3,-14.450541,-73.406609,Sunday,3,Off,Off,Off,Off,Off
4,-14.797542,-73.314117,Sunday,4,Off,Off,Off,Off,Off
...,...,...,...,...,...,...,...,...,...
8667,60.158787,-28.907974,Sunday,19,Off,Off,Off,Off,Off
8668,40.268089,-52.081150,Sunday,20,Off,Off,Off,Off,Off
8669,33.264790,-77.292526,Sunday,21,Off,Off,On,Off,Off
8670,22.864946,-12.871217,Sunday,22,Off,Off,Off,Off,Off


In [47]:
fig = px.scatter(x=tsne_result.iloc[:, 0], y=tsne_result.iloc[:, 1])

fig.update_xaxes(range = [-105, 105])
fig.update_yaxes(range = [-105, 105])
fig.update_layout(width=620, height=600, )
fig.update_coloraxes(showscale=False)
fig.update(layout_coloraxis_showscale=False)

for ax in fig['layout']:
    if ax[:5]=='xaxis':
        fig['layout'][ax]['nticks']=20

for ax in fig['layout']:
    if ax[:5]=='yaxis':
        fig['layout'][ax]['nticks']=20

fig.show()

In [48]:
for col in ["DoW", "Hour", "Clothes Washer", "Oven", "Dishwasher", "Microwave", "Dryer"]:
    fig = px.scatter(x=tsne_result.iloc[:, 0],
                             y=tsne_result.iloc[:, 1],
                             color=tsne_result[col],
                             opacity=1,
                             labels={"color": col})

    fig.update_xaxes(range = [-105, 105])
    fig.update_yaxes(range = [-105, 105])
    fig.update_layout(width=650, height=600)

    for ax in fig['layout']:
        if ax[:5]=='xaxis':
            fig['layout'][ax]['nticks']=20

    for ax in fig['layout']:
        if ax[:5]=='yaxis':
            fig['layout'][ax]['nticks']=20

    fig.show()

# DBSCAN

In [49]:
from sklearn.cluster import DBSCAN

In [50]:
tsne_result.rename(columns={0: "X1", 1:"X2"}, inplace=True)
X_tsne = tsne_result[["X1", "X2"]]

In [51]:
s='''
        aliceblue, antiquewhite, aqua, aquamarine, azure,
        bisque, black, blanchedalmond, blue,
        blueviolet, brown, burlywood, cadetblue,
        chartreuse, chocolate, coral, cornflowerblue,
        cornsilk, crimson, cyan, darkblue, darkcyan,
        darkgoldenrod, darkgray, darkgrey, darkgreen,
        darkkhaki, darkmagenta, darkolivegreen, darkorange,
        darkorchid, darkred, darksalmon, darkseagreen,
        darkslateblue, darkturquoise, darkviolet, deeppink, deepskyblue,
        dodgerblue, firebrick,
        floralwhite, forestgreen, fuchsia, gainsboro,
        ghostwhite, gold, goldenrod, green,
        greenyellow, honeydew, hotpink, indianred, indigo,
        ivory, khaki, lavender, lavenderblush, lawngreen,
        lemonchiffon, lightblue, lightcoral, lightcyan,
        lightgoldenrodyellow,
        lightgreen, lightpink, lightsalmon, lightseagreen,
        lightskyblue,
        lightsteelblue, lightyellow, lime, limegreen,
        linen, magenta, maroon, mediumaquamarine,
        mediumblue, mediumorchid, mediumpurple,
        mediumseagreen, mediumslateblue, mediumspringgreen,
        mediumturquoise, mediumvioletred, midnightblue,
        mintcream, mistyrose, moccasin, navajowhite, navy,
        oldlace, olive, olivedrab, orange, orangered,
        orchid, palegoldenrod, palegreen, paleturquoise,
        palevioletred, papayawhip, peachpuff, peru, pink,
        plum, powderblue, purple, red, rosybrown,
        royalblue, saddlebrown, salmon, sandybrown,
        seagreen, seashell, sienna, silver, skyblue,
        slateblue, snow, springgreen,
        steelblue, tan, teal, thistle, tomato, turquoise,
        violet, wheat, white, whitesmoke, yellow,
        yellowgreen
        '''
li = s.split(',')
li = [l.replace('\n', '') for l in li]
li = [l.replace(' ', '') for l in li]

In [52]:
def colors(count, seed=None):
    np.random.seed(seed)

    return list(np.random.choice(li, count, replace=False))


color_list = ["black"] + colors(count=75, seed=2)

In [53]:
#list(zip(np.unique(labels), color_list))

In [54]:
dbscan_cluster = DBSCAN(eps=np.pi, min_samples=10)
dbscan_cluster.fit(X_tsne)

labels=dbscan_cluster.labels_
N_clus=len(set(labels))-(1 if -1 in labels else 0)
print('Estimated no. of clusters: %d' % N_clus)

n_noise = list(dbscan_cluster.labels_).count(-1)
print('Estimated no. of noise points: %d' % n_noise)

color_list = ["black"] + colors(count=N_clus-1, seed=2)
X_tsne["Group"] = labels#.astype(str)

fig = px.scatter(x=X_tsne.iloc[:, 0], y=X_tsne.iloc[:, 1], color=X_tsne["Group"], opacity=1, labels={"color": "Group"}, color_continuous_scale=color_list)
fig.update_xaxes(range = [-105, 105])
fig.update_yaxes(range = [-105, 105])
fig.update_layout(width=620, height=600, )
fig.update_coloraxes(showscale=False)
fig.update(layout_coloraxis_showscale=False)

for ax in fig['layout']:
    if ax[:5]=='xaxis':
        fig['layout'][ax]['nticks']=20

for ax in fig['layout']:
    if ax[:5]=='yaxis':
        fig['layout'][ax]['nticks']=20

fig.show()

Estimated no. of clusters: 76
Estimated no. of noise points: 66


In [55]:
# list(zip(color_list, np.unique(labels)))

# Net +/-

In [56]:
tsne_result["Difference"] = (hour[producing].sum(axis=1) / 1000 - hour[consuming].sum(axis=1) / 1000).reset_index(drop=True) # Diff is in kWh
tsne_result["Binned"] = (pd.cut(tsne_result["Difference"], bins=[-np.inf, 0, 1/3, 2/3, 1, np.inf])).astype(str)

In [57]:
tsne_result["Binned"].unique()

array(['(-inf, 0.0]', '(0.0, 0.333]', '(0.333, 0.667]', '(1.0, inf]',
       '(0.667, 1.0]'], dtype=object)

In [58]:
tsne_result['Group'] = labels

average_difference_per_group = tsne_result.groupby(['Group']).sum()
average_difference_per_group = average_difference_per_group[['Difference']]
average_difference_per_group.reset_index(inplace=True)
average_difference_per_group.rename(columns={'Difference': 'Group Sum'}, inplace=True)
average_difference_per_group

Unnamed: 0,Group,Group Sum
0,-1,-33.490222
1,0,-31.949615
2,1,-91.834521
3,2,23.472331
4,3,198.880114
...,...,...
72,71,-37.842217
73,72,179.540748
74,73,-32.506317
75,74,87.222581


In [59]:
tsne_result = tsne_result.merge(average_difference_per_group, on='Group', how='left')
tsne_result

Unnamed: 0,X1,X2,DoW,Hour,Clothes Washer,Oven,Dishwasher,Microwave,Dryer,Difference,Binned,Group,Group Sum
0,-15.731414,-72.254936,Sunday,0,Off,Off,Off,Off,Off,-1.733021,"(-inf, 0.0]",0,-31.949615
1,-14.642564,-73.713203,Sunday,1,Off,Off,Off,Off,Off,-2.201106,"(-inf, 0.0]",0,-31.949615
2,-14.894239,-73.212875,Sunday,2,Off,Off,Off,Off,Off,-1.925911,"(-inf, 0.0]",0,-31.949615
3,-14.450541,-73.406609,Sunday,3,Off,Off,Off,Off,Off,-1.735325,"(-inf, 0.0]",0,-31.949615
4,-14.797542,-73.314117,Sunday,4,Off,Off,Off,Off,Off,-1.964172,"(-inf, 0.0]",0,-31.949615
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8667,60.158787,-28.907974,Sunday,19,Off,Off,Off,Off,Off,-1.001416,"(-inf, 0.0]",34,-276.778143
8668,40.268089,-52.081150,Sunday,20,Off,Off,Off,Off,Off,-0.639806,"(-inf, 0.0]",44,-46.008906
8669,33.264790,-77.292526,Sunday,21,Off,Off,On,Off,Off,-1.321079,"(-inf, 0.0]",11,-256.211732
8670,22.864946,-12.871217,Sunday,22,Off,Off,Off,Off,Off,-0.641581,"(-inf, 0.0]",37,-205.785426


In [60]:
tsne_result

Unnamed: 0,X1,X2,DoW,Hour,Clothes Washer,Oven,Dishwasher,Microwave,Dryer,Difference,Binned,Group,Group Sum
0,-15.731414,-72.254936,Sunday,0,Off,Off,Off,Off,Off,-1.733021,"(-inf, 0.0]",0,-31.949615
1,-14.642564,-73.713203,Sunday,1,Off,Off,Off,Off,Off,-2.201106,"(-inf, 0.0]",0,-31.949615
2,-14.894239,-73.212875,Sunday,2,Off,Off,Off,Off,Off,-1.925911,"(-inf, 0.0]",0,-31.949615
3,-14.450541,-73.406609,Sunday,3,Off,Off,Off,Off,Off,-1.735325,"(-inf, 0.0]",0,-31.949615
4,-14.797542,-73.314117,Sunday,4,Off,Off,Off,Off,Off,-1.964172,"(-inf, 0.0]",0,-31.949615
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8667,60.158787,-28.907974,Sunday,19,Off,Off,Off,Off,Off,-1.001416,"(-inf, 0.0]",34,-276.778143
8668,40.268089,-52.081150,Sunday,20,Off,Off,Off,Off,Off,-0.639806,"(-inf, 0.0]",44,-46.008906
8669,33.264790,-77.292526,Sunday,21,Off,Off,On,Off,Off,-1.321079,"(-inf, 0.0]",11,-256.211732
8670,22.864946,-12.871217,Sunday,22,Off,Off,Off,Off,Off,-0.641581,"(-inf, 0.0]",37,-205.785426


In [61]:
# zero = tsne_result["Group Sum"].min() / (tsne_result["Group Sum"].min() - tsne_result["Group Sum"].max())
fig = px.scatter(x=tsne_result.iloc[:, 0],
                 y=tsne_result.iloc[:, 1],
                 color=tsne_result["Group Sum"],
                 #.clip(lower=0, upper=4) * 100,
                 opacity=1,
                 labels={"color": "Energy Surplus <br> [kWh]"},
                 color_continuous_scale=[(0, "rgb(234,143,129)"), (0.5, "white"), (1, "rgb(32,115,171)")],
                 range_color=[-tsne_result['Group Sum'].max(), tsne_result['Group Sum'].max()]
                 )

fig.update_xaxes(range=[-105, 105])
fig.update_yaxes(range=[-105, 105])
fig.update_layout(width=660, height=600, )

for ax in fig['layout']:
    if ax[:5]=='xaxis':
        fig['layout'][ax]['nticks']=20

for ax in fig['layout']:
    if ax[:5]=='yaxis':
        fig['layout'][ax]['nticks']=20


fig.show()

# Decision Tree

In [62]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder

from matplotlib import pyplot as plt
from sklearn.tree import export_text
from sklearn import tree

In [63]:
ohe = OneHotEncoder(sparse=False)
hot_np = ohe.fit_transform(tsne_result[["DoW", "Hour"]])
hot = pd.DataFrame(data=hot_np, columns=ohe.get_feature_names_out())
hot

Unnamed: 0,DoW_Friday,DoW_Monday,DoW_Saturday,DoW_Sunday,DoW_Thursday,DoW_Tuesday,DoW_Wednesday,Hour_0,Hour_1,Hour_2,...,Hour_14,Hour_15,Hour_16,Hour_17,Hour_18,Hour_19,Hour_20,Hour_21,Hour_22,Hour_23
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8667,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8668,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
8669,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8670,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [64]:
dbscan_cluster.labels_

array([ 0,  0,  0, ..., 11, 37, 52], dtype=int64)

In [65]:
rf_df = hour[consuming].reset_index().merge(hot, left_index=True, right_index=True)
rf_df.drop(["Timestamp"], axis=1, inplace=True)
rf_df["Target"] = dbscan_cluster.labels_

rf_df = rf_df[rf_df["Target"] != -1] # Drops the noise points

rf_df.dropna(inplace=True)

In [66]:
# target = 41
# rf_df["Target"][rf_df["Target"] != target] = -1
# rf_df

In [67]:
# X_rf = rf_df[consuming + hot.columns.tolist()]
# y_rf = rf_df["Target"]

In [68]:
# clf = DecisionTreeClassifier(criterion="entropy")
# clf.fit(X_rf, y_rf)
#
# feature_names = list(X_rf.columns)
# r = export_text(clf, feature_names=feature_names)
# print(r)

In [69]:
# tree.plot_tree(clf, feature_names=feature_names)
# plt.savefig("TreeClothes.eps", format='eps', bbox_inches = "tight")
# plt.show()

In [70]:
# from sklearn.metrics import accuracy_score
#
# print(f"Accuracy of tree is {accuracy_score(clf.predict(X_rf), y_rf)}, depth of tree is {clf.get_depth()}, and # of leaves is {clf.get_n_leaves()}")

#### Many Trees

In [71]:
average_difference_per_group

Unnamed: 0,Group,Group Sum
0,-1,-33.490222
1,0,-31.949615
2,1,-91.834521
3,2,23.472331
4,3,198.880114
...,...,...
72,71,-37.842217
73,72,179.540748
74,73,-32.506317
75,74,87.222581


In [72]:
number_of_negative_clusters = 0
for value in average_difference_per_group['Group Sum']:
    if value < 0:
        number_of_negative_clusters += 1

print(f"{number_of_negative_clusters} out of {N_clus} clusters have a negative energy sum")

48 out of 76 clusters have a negative energy sum


In [73]:
from itertools import chain
from collections import defaultdict

attribute_occurences = []
attribute_importances = []

for idx, diff in average_difference_per_group['Group Sum'].iteritems():
    if diff < 0: #If the energy difference is negative we want to create a tree for it.
        target = idx #Set the cluster target

        group_df = rf_df.copy()
        group_df["Target"][group_df["Target"] != target] = -1 #Assign all other groups than target group to -2, since -1 target is the noise group


    X_rf = group_df[consuming + hot.columns.tolist()]
    y_rf = group_df["Target"]

    features = X_rf.columns.tolist()

    clf = DecisionTreeClassifier(criterion="entropy")
    clf.fit(X_rf, y_rf)


    feature_importances = clf.feature_importances_ #Returns a list of gini importances for each attribute

    print(feature_importances)

    positive_importances = [i for i in feature_importances if i > 0]
    attribute_importances.append(positive_importances)

    positive_importances_idx = [i for i in range(len(feature_importances)) if feature_importances[i] > 0]
    attribute_occurences.append(positive_importances_idx) #Append index of those attributes with a positive importance.



attribute_occurences = list(chain.from_iterable(attribute_occurences)) #Unnests the attribute_occurences list
attribute_importances = list(chain.from_iterable(attribute_importances))

attribute_importances

[0.         0.         0.         0.         0.         0.
 0.         0.         0.45459318 0.         0.         0.46679351
 0.         0.         0.         0.         0.         0.
 0.         0.04924253 0.         0.         0.         0.
 0.         0.         0.02937078 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.        ]
[0.         0.         0.         0.15932775 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.01860289 0.         0.80285055
 0.         0.         0.         0.         0.         0.
 0.         0.0192188  0.         0.      

[0.4545931799544443,
 0.4667935125698118,
 0.04924253165199771,
 0.029370775823746226,
 0.15932775361574938,
 0.018602893506266704,
 0.8028505501824568,
 0.01921880269552695,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.017959590276764487,
 0.017705163648550896,
 0.6405273859692925,
 0.3238078601053919,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.34176745038215645,
 0.0177051636485509,
 0.6405273859692926,
 0.041658675858573795,
 0.7111264830414019,
 0.17800533578081384,
 0.04558283539489298,
 0.023626669924317497,
 0.556904057669155,
 0.026852379027733717,
 0.15587885712521313,
 0.10027869899912328,
 0.054743325619408104,
 0.08450805832803662,
 0.020834623231330007,
 0.010858777770452785,
 0.11929933700068576,
 0.003897646963400248,
 0.0405280401441998

In [74]:
attributes = group_df[consuming + hot.columns.tolist()].columns.tolist() #A list of all the attributes corresponding to those in X_rf

# Since attribute_occurences contains indexes of attributes, we replace them with the string names from the list attributes
for idx in range(len(attribute_occurences)):
    value = attribute_occurences[idx]
    attribute_occurences[idx] = attributes[value]

In [75]:
all_attribute_importances = list(zip(attribute_occurences, attribute_importances))

attribute_importances_df = pd.DataFrame(all_attribute_importances, columns=['Attribute', 'Importance'])
attribute_importances_df = attribute_importances_df.groupby(['Attribute']).mean()
attribute_importances_df.sort_values(by='Importance', ascending=False, inplace=True)
attribute_importances_df.reset_index(inplace=True)
attribute_importances_df

Unnamed: 0,Attribute,Importance
0,Elec_PowerPlugsMBAEast,0.61405
1,Load_OvenPowerTotal,0.61372
2,Hour_21,0.583655
3,Hour_19,0.52083
4,Hour_22,0.509546
5,Hour_0,0.509329
6,Hour_20,0.481907
7,Hour_18,0.445376
8,Elec_PowerDishwasher,0.360125
9,Elec_PowerLightsBasementStair,0.315447


In [76]:
# Count number of occurences of each attribute in attribute_occurences.
summed_attribute_occurences = defaultdict(lambda: 0)
for attribute in attribute_occurences:
    summed_attribute_occurences[attribute] += 1

attribute_occurences_df = pd.DataFrame(summed_attribute_occurences.items(), columns=['Attribute', 'Count'])
attribute_occurences_df['Count'] = (attribute_occurences_df['Count'] / number_of_negative_clusters).round(2)
attribute_occurences_df.sort_values(by='Count', ascending=True, inplace=True)
attribute_occurences_df

Unnamed: 0,Attribute,Count
52,Elec_PowerLightsAttic,0.02
35,Hour_19,0.02
50,Elec_PowerPlugsInstHall,0.02
34,Hour_23,0.02
48,DoW_Monday,0.02
33,Hour_9,0.02
40,Hour_21,0.02
53,Hour_18,0.02
24,Hour_6,0.02
43,Hour_22,0.02


In [77]:
master_df = attribute_importances_df.merge(attribute_occurences_df, on='Attribute')
master_df.sort_values(by='Importance', ascending=True, inplace=True)
master_df

Unnamed: 0,Attribute,Importance,Count
54,Hour_23,0.003769,0.02
53,Hour_9,0.005703,0.02
52,Elec_PowerLightsAttic,0.007006,0.02
51,Hour_15,0.007213,0.04
50,Elec_PowerPlugs2ndFloor,0.007693,0.17
49,DoW_Monday,0.00857,0.02
48,Load_DryerPowerTotal,0.008604,0.04
47,Hour_8,0.010928,0.04
46,Hour_13,0.013671,0.1
45,DoW_Thursday,0.014547,0.12


In [78]:
fig = go.Figure(go.Bar(
    x = master_df['Importance'],
    y = master_df['Attribute'],
    orientation='h',
    text = round(master_df['Importance'], 2),
    textposition='outside',
))

fig.update_layout(height=700)
fig.update_xaxes(title='Feature Importance')

fig.show()