In [2]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
import pandas as pd
spark = SparkSession.builder.config("spark.driver.host","localhost").master("local[*]").getOrCreate()

In [3]:
data = spark.read.format("csv").option("header", "true").option("inferSchema" , "true").csv("InputTrain.csv")
label = spark.read.format("csv").option("header", "true").option("inferSchema" , "true").csv("StepOne_LabelTrain.csv")
test = spark.read.format("csv").option("header", "true").option("inferSchema" , "true").csv("InputTest.csv")

In [4]:
# data_label = data.join(label,["index","House_id"])

## Gradient Booster Tree

In [5]:
from pyspark.ml.classification import GBTClassifier
from pyspark.ml.regression import GBTRegressor
from pyspark.ml.evaluation import MulticlassClassificationEvaluator,BinaryClassificationEvaluator
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.feature import IndexToString, StringIndexer, VectorIndexer
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml import Pipeline

In [6]:
# data = data.drop('index','House_id')

In [7]:
assembler = VectorAssembler(inputCols=data.drop("Index","House_id").columns, outputCol='features')
data_T = assembler.transform(data)
test_T = assembler.transform(test)

In [8]:
data_TT = data_T.select("features")
data_TT.show()

+--------------------+
|            features|
+--------------------+
|[180.0,180.0,180....|
|[2437.0,2426.0,21...|
|[232.0,232.0,232....|
|[180.333333333333...|
|[344.0,341.0,341....|
|[232.0,235.0,235....|
|[193.0,190.0,190....|
|[189.333333333333...|
|[188.666666666666...|
|[209.0,210.0,210....|
|[193.0,191.0,191....|
|[192.0,191.0,190....|
|[586.333333333333...|
|[183.0,181.0,181....|
|[180.333333333333...|
|[350.333333333333...|
|[181.333333333333...|
|[181.0,184.0,183....|
|[182.0,182.0,182....|
|[182.0,182.0,181....|
+--------------------+
only showing top 20 rows



In [30]:
test_TT = test_T.select("features")
test_TT.show()

+--------------------+
|            features|
+--------------------+
|[595.0,595.0,601....|
|[2682.66666666666...|
|[529.0,534.0,540....|
|[197.5,197.5,198....|
|[291.5,285.0,283....|
|[121.0,121.0,119....|
|[656.5,653.0,653....|
|[126.666666666666...|
|[323.0,322.0,324....|
|[119.0,118.0,118....|
|[531.333333333333...|
|[421.333333333333...|
|[2668.5,2677.0,26...|
|[118.0,118.0,118....|
|[215.0,211.0,211....|
|[198.0,198.0,195....|
|[359.0,357.0,367....|
|[220.0,211.0,211....|
|[1223.33333333333...|
|[2796.0,2793.0,28...|
+--------------------+
only showing top 20 rows



####  Washing Machine

In [32]:
# train dataset
train_WM = data_T.select('Index','features')
label_WM = label.select('Index','Washing Machine')
train_WM = train_WM.join(label_WM,"Index").drop('index')
train_WM.show(5)

+--------------------+---------------+
|            features|Washing Machine|
+--------------------+---------------+
|[180.0,180.0,180....|              0|
|[2437.0,2426.0,21...|              0|
|[232.0,232.0,232....|              0|
|[180.333333333333...|              0|
|[344.0,341.0,341....|              0|
+--------------------+---------------+
only showing top 5 rows



In [7]:
# test_WM = test_T.select('Index','features')
# test_WM.show(5)

In [33]:
# Index labels, adding metadata to the label column.
# Fit on whole dataset to include all labels in index.
#More specifically: 
#StringIndexer maps a string column of labels to an ML column of label indices. If the input column is 
#numeric, we cast it to string and index the string values. The indices are in [0, numLabels). 
# Automatically identify categorical features, and index them.
# We specify maxCategories so features with > 4 distinct values are treated as continuous.
featureIndexer_WM =\
    VectorIndexer(inputCol="features", outputCol="indexedFeatures_WM", maxCategories=4).fit(train_WM)

# Split the data into training and test sets (30% held out for testing)
#take a look to the function randomSplit()
(trainingData, testData) = train_WM.randomSplit([0.7, 0.3])
# Train a DecisionTree model. 
#take a look to DecisionTreeClassifier()
trainingData.show()
dt_WM = DecisionTreeClassifier(featuresCol='indexedFeatures_WM', labelCol='Washing Machine')

# Chain indexers and tree in a Pipeline
#Pipeline provide a uniform set of high-level APIs built on top of DataFrames that help 
#users create and tune practical machine learning pipelines.
pipeline_WM = Pipeline(stages=[featureIndexer_WM, dt_WM])

# Train model.  This also runs the indexers.
model_WM = pipeline_WM.fit(trainingData)

# Make predictions.
predictions_WM = model_WM.transform(testData)

# Select example rows to display.
predictions_WM.select("prediction", "features").show(5)

# Select (prediction, true label) and compute test error
#Evaluator for Multiclass Classification, which expects input columns: prediction, label, 
#weight (optional) and probabilityCol (only for logLoss).
evaluator = MulticlassClassificationEvaluator(
    labelCol="Washing Machine", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions_WM)
print("Test Error = %g " % (1.0 - accuracy))

# #in the pipeline there are different stages, we want to print only the tree, so we will choose the 2nd stage
# treeModel = model.stages[2] 
# # summary only
# print(treeModel)

+--------------------+---------------+
|            features|Washing Machine|
+--------------------+---------------+
|[145.0,149.0,149....|              0|
|[148.0,147.0,152....|              0|
|[148.0,147.666666...|              0|
|[149.0,147.0,149....|              0|
|[149.0,148.5,149....|              0|
|[149.0,149.0,150....|              0|
|[149.0,150.0,149....|              0|
|[149.0,152.0,148....|              0|
|[149.0,152.0,150....|              0|
|[149.0,176.333333...|              0|
|[149.5,148.5,148....|              0|
|[150.0,148.0,148....|              0|
|[150.0,150.0,149....|              0|
|[150.0,150.0,150....|              0|
|[150.0,150.333333...|              0|
|[150.0,151.0,150....|              0|
|[150.0,151.666666...|              0|
|[151.0,145.0,148....|              0|
|[151.0,147.0,152....|              0|
|[151.0,148.0,151....|              1|
+--------------------+---------------+
only showing top 20 rows

+----------+--------------------+
|pre

### Dishwasher

In [34]:
train_DW = data_T.select('Index','features')
label_DW = label.select('Index','Dishwasher')
train_DW = train_DW.join(label_DW,"Index").drop('Index')
train_DW.show(5)

+--------------------+----------+
|            features|Dishwasher|
+--------------------+----------+
|[180.0,180.0,180....|         0|
|[2437.0,2426.0,21...|         0|
|[232.0,232.0,232....|         0|
|[180.333333333333...|         0|
|[344.0,341.0,341....|         0|
+--------------------+----------+
only showing top 5 rows



In [35]:
# Index labels, adding metadata to the label column.
# Fit on whole dataset to include all labels in index.
#More specifically: 
#StringIndexer maps a string column of labels to an ML column of label indices. If the input column is 
#numeric, we cast it to string and index the string values. The indices are in [0, numLabels). 
# Automatically identify categorical features, and index them.
# We specify maxCategories so features with > 4 distinct values are treated as continuous.
featureIndexer_DW =\
    VectorIndexer(inputCol="features", outputCol="indexedFeatures_DW", maxCategories=4).fit(train_DW)

# Split the data into training and test sets (30% held out for testing)
#take a look to the function randomSplit()
(trainingData, testData) = train_DW.randomSplit([0.7, 0.3])
# Train a DecisionTree model. 
#take a look to DecisionTreeClassifier()
trainingData.show()
dt_DW = DecisionTreeClassifier(featuresCol='indexedFeatures_DW', labelCol='Dishwasher')

# Chain indexers and tree in a Pipeline
#Pipeline provide a uniform set of high-level APIs built on top of DataFrames that help 
#users create and tune practical machine learning pipelines.
pipeline_DW = Pipeline(stages=[featureIndexer_DW, dt_DW])

# Train model.  This also runs the indexers.
model_DW = pipeline_DW.fit(trainingData)

# Make predictions.
predictions_DW = model_DW.transform(testData)

# Select example rows to display.
predictions_DW.select("prediction", "features").show(5)

# Select (prediction, true label) and compute test error
#Evaluator for Multiclass Classification, which expects input columns: prediction, label, 
#weight (optional) and probabilityCol (only for logLoss).
evaluator = MulticlassClassificationEvaluator(
    labelCol="Dishwasher", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions_DW)
print("Test Error = %g " % (1.0 - accuracy))

# #in the pipeline there are different stages, we want to print only the tree, so we will choose the 2nd stage
# treeModel = model.stages[2] 
# # summary only
# print(treeModel)

+--------------------+----------+
|            features|Dishwasher|
+--------------------+----------+
|[146.0,147.0,148....|         0|
|[148.0,147.666666...|         0|
|[149.0,147.0,149....|         0|
|[149.0,148.5,149....|         0|
|[149.0,149.0,150....|         0|
|[149.0,149.0,151....|         0|
|[149.0,150.0,147....|         0|
|[149.0,150.0,149....|         0|
|[149.0,150.0,149....|         0|
|[149.0,152.0,148....|         0|
|[149.0,152.0,150....|         0|
|[149.5,148.5,148....|         0|
|[150.0,148.0,148....|         0|
|[150.0,150.0,149....|         0|
|[150.0,150.0,152....|         0|
|[150.0,151.0,149....|         0|
|[150.0,151.0,150....|         0|
|[150.0,151.5,152....|         0|
|[150.333333333333...|         0|
|[151.0,145.0,148....|         0|
+--------------------+----------+
only showing top 20 rows

+----------+--------------------+
|prediction|            features|
+----------+--------------------+
|       0.0|[145.0,149.0,149....|
|       0.0|[148.0,147

### Microwave

In [36]:
train_MW = data_T.select('Index','features')
label_MW = label.select('Index','Microwave')
train_MW = train_MW.join(label_MW,"Index").drop('Index')
train_MW.show(5)

+--------------------+---------+
|            features|Microwave|
+--------------------+---------+
|[180.0,180.0,180....|        0|
|[2437.0,2426.0,21...|        0|
|[232.0,232.0,232....|        0|
|[180.333333333333...|        0|
|[344.0,341.0,341....|        0|
+--------------------+---------+
only showing top 5 rows



In [37]:
# Index labels, adding metadata to the label column.
# Fit on whole dataset to include all labels in index.
#More specifically: 
#StringIndexer maps a string column of labels to an ML column of label indices. If the input column is 
#numeric, we cast it to string and index the string values. The indices are in [0, numLabels). 
# Automatically identify categorical features, and index them.
# We specify maxCategories so features with > 4 distinct values are treated as continuous.
featureIndexer_MW =\
    VectorIndexer(inputCol="features", outputCol="indexedFeatures_MW", maxCategories=4).fit(train_MW)

# Split the data into training and test sets (30% held out for testing)
#take a look to the function randomSplit()
(trainingData, testData) = train_MW.randomSplit([0.7, 0.3])
# Train a DecisionTree model. 
#take a look to DecisionTreeClassifier()
trainingData.show()
dt_MW = DecisionTreeClassifier(featuresCol='indexedFeatures_MW', labelCol='Microwave')

# Chain indexers and tree in a Pipeline
#Pipeline provide a uniform set of high-level APIs built on top of DataFrames that help 
#users create and tune practical machine learning pipelines.
pipeline_MW = Pipeline(stages=[featureIndexer_MW, dt_MW])

# Train model.  This also runs the indexers.
model_MW = pipeline_MW.fit(trainingData)

# Make predictions.
predictions_MW = model_MW.transform(testData)

# Select example rows to display.
predictions_MW.select("prediction", "features").show(5)

# Select (prediction, true label) and compute test error
#Evaluator for Multiclass Classification, which expects input columns: prediction, label, 
#weight (optional) and probabilityCol (only for logLoss).
evaluator = MulticlassClassificationEvaluator(
    labelCol="Microwave", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions_MW)
print("Test Error = %g " % (1.0 - accuracy))

# #in the pipeline there are different stages, we want to print only the tree, so we will choose the 2nd stage
# treeModel = model.stages[2] 
# # summary only
# print(treeModel)

+--------------------+---------+
|            features|Microwave|
+--------------------+---------+
|[146.0,147.0,148....|        0|
|[148.0,147.0,152....|        0|
|[149.0,147.0,149....|        0|
|[149.0,149.0,150....|        0|
|[149.0,149.0,151....|        0|
|[149.0,150.0,147....|        0|
|[149.0,150.0,149....|        0|
|[149.0,152.0,148....|        0|
|[149.0,176.333333...|        0|
|[150.0,150.0,149....|        0|
|[150.0,150.0,150....|        0|
|[150.0,150.0,152....|        0|
|[150.0,150.0,152....|        0|
|[150.0,150.333333...|        0|
|[150.0,151.0,149....|        0|
|[150.0,151.0,150....|        0|
|[150.0,151.5,152....|        0|
|[150.0,151.666666...|        0|
|[150.333333333333...|        0|
|[151.0,148.0,151....|        0|
+--------------------+---------+
only showing top 20 rows

+----------+--------------------+
|prediction|            features|
+----------+--------------------+
|       0.0|[145.0,149.0,149....|
|       0.0|[148.0,147.666666...|
|       0.0|

### Kettle

In [38]:
train_K = data_T.select('Index','features')
label_K = label.select('Index','Kettle')
train_K = train_K.join(label_K,"Index").drop('Index')
train_K.show(5)

+--------------------+------+
|            features|Kettle|
+--------------------+------+
|[180.0,180.0,180....|     0|
|[2437.0,2426.0,21...|     0|
|[232.0,232.0,232....|     0|
|[180.333333333333...|     0|
|[344.0,341.0,341....|     0|
+--------------------+------+
only showing top 5 rows



In [40]:
# Index labels, adding metadata to the label column.
# Fit on whole dataset to include all labels in index.
#More specifically: 
#StringIndexer maps a string column of labels to an ML column of label indices. If the input column is 
#numeric, we cast it to string and index the string values. The indices are in [0, numLabels). 
# Automatically identify categorical features, and index them.
# We specify maxCategories so features with > 4 distinct values are treated as continuous.
featureIndexer_K =\
    VectorIndexer(inputCol="features", outputCol="indexedFeatures_K", maxCategories=4).fit(train_K)

# Split the data into training and test sets (30% held out for testing)
#take a look to the function randomSplit()
(trainingData, testData) = train_K.randomSplit([0.7, 0.3])
# Train a DecisionTree model. 
#take a look to DecisionTreeClassifier()
trainingData.show()
dt_K = DecisionTreeClassifier(featuresCol='indexedFeatures_K', labelCol='Kettle')

# Chain indexers and tree in a Pipeline
#Pipeline provide a uniform set of high-level APIs built on top of DataFrames that help 
#users create and tune practical machine learning pipelines.
pipeline_K = Pipeline(stages=[featureIndexer_K, dt_K])

# Train model.  This also runs the indexers.
model_K = pipeline_K.fit(trainingData)

# Make predictions.
predictions_K = model_K.transform(testData)

# Select example rows to display.
predictions_K.select("prediction", "features").show(5)

# Select (prediction, true label) and compute test error
#Evaluator for Multiclass Classification, which expects input columns: prediction, label, 
#weight (optional) and probabilityCol (only for logLoss).
evaluator = MulticlassClassificationEvaluator(
    labelCol="Kettle", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions_K)
print("Test Error = %g " % (1.0 - accuracy))

# #in the pipeline there are different stages, we want to print only the tree, so we will choose the 2nd stage
# treeModel = model.stages[2] 
# # summary only
# print(treeModel)

+--------------------+------+
|            features|Kettle|
+--------------------+------+
|[145.0,149.0,149....|     0|
|[146.0,147.0,148....|     0|
|[148.0,147.0,152....|     0|
|[148.0,147.666666...|     0|
|[149.0,147.0,149....|     0|
|[149.0,148.5,149....|     0|
|[149.0,149.0,150....|     0|
|[149.0,149.0,151....|     0|
|[149.0,150.0,149....|     0|
|[149.0,150.0,149....|     0|
|[149.0,152.0,150....|     0|
|[149.0,176.333333...|     0|
|[149.5,148.5,148....|     0|
|[150.0,150.0,149....|     0|
|[150.0,150.0,152....|     0|
|[150.0,150.0,152....|     0|
|[150.0,151.0,149....|     0|
|[150.0,151.0,150....|     0|
|[150.0,151.666666...|     0|
|[150.333333333333...|     0|
+--------------------+------+
only showing top 20 rows

+----------+--------------------+
|prediction|            features|
+----------+--------------------+
|       0.0|[149.0,150.0,147....|
|       0.0|[149.0,152.0,148....|
|       0.0|[150.0,148.0,148....|
|       0.0|[150.0,150.0,149....|
|       0.0|[150

### Tumble Dryer

In [41]:
train_TD = data_T.select('Index','features')
label_TD = label.select('Index','Tumble Dryer')
train_TD = train_TD.join(label_TD,"Index").drop('Index')
train_TD.show(5)

+--------------------+------------+
|            features|Tumble Dryer|
+--------------------+------------+
|[180.0,180.0,180....|           0|
|[2437.0,2426.0,21...|           0|
|[232.0,232.0,232....|           0|
|[180.333333333333...|           0|
|[344.0,341.0,341....|           0|
+--------------------+------------+
only showing top 5 rows



In [42]:
# Index labels, adding metadata to the label column.
# Fit on whole dataset to include all labels in index.
#More specifically: 
#StringIndexer maps a string column of labels to an ML column of label indices. If the input column is 
#numeric, we cast it to string and index the string values. The indices are in [0, numLabels). 
# Automatically identify categorical features, and index them.
# We specify maxCategories so features with > 4 distinct values are treated as continuous.
featureIndexer_TD =\
    VectorIndexer(inputCol="features", outputCol="indexedFeatures_TD", maxCategories=4).fit(train_TD)

# Split the data into training and test sets (30% held out for testing)
#take a look to the function randomSplit()
(trainingData, testData) = train_TD.randomSplit([0.7, 0.3])
# Train a DecisionTree model. 
#take a look to DecisionTreeClassifier()
trainingData.show()
dt_TD = DecisionTreeClassifier(featuresCol='indexedFeatures_TD', labelCol='Tumble Dryer')

# Chain indexers and tree in a Pipeline
#Pipeline provide a uniform set of high-level APIs built on top of DataFrames that help 
#users create and tune practical machine learning pipelines.
pipeline_TD = Pipeline(stages=[featureIndexer_TD, dt_TD])

# Train model.  This also runs the indexers.
model_TD = pipeline_TD.fit(trainingData)

# Make predictions.
predictions_TD = model_TD.transform(testData)

# Select example rows to display.
predictions_TD.select("prediction", "features").show(5)

# Select (prediction, true label) and compute test error
#Evaluator for Multiclass Classification, which expects input columns: prediction, label, 
#weight (optional) and probabilityCol (only for logLoss).
evaluator = MulticlassClassificationEvaluator(
    labelCol="Tumble Dryer", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions_TD)
print("Test Error = %g " % (1.0 - accuracy))

# #in the pipeline there are different stages, we want to print only the tree, so we will choose the 2nd stage
# treeModel = model.stages[2] 
# # summary only
# print(treeModel)

+--------------------+------------+
|            features|Tumble Dryer|
+--------------------+------------+
|[146.0,147.0,148....|           0|
|[149.0,147.0,149....|           0|
|[149.0,149.0,150....|           0|
|[149.0,149.0,151....|           0|
|[149.0,150.0,147....|           0|
|[149.0,150.0,149....|           0|
|[149.0,152.0,150....|           0|
|[149.0,176.333333...|           0|
|[149.5,148.5,148....|           0|
|[150.0,150.0,149....|           0|
|[150.0,150.0,149....|           0|
|[150.0,150.0,150....|           0|
|[150.0,150.0,152....|           0|
|[150.0,150.0,152....|           0|
|[150.0,151.0,149....|           0|
|[150.0,151.0,150....|           0|
|[150.0,151.5,152....|           0|
|[150.0,151.666666...|           0|
|[150.333333333333...|           0|
|[150.5,151.0,152....|           0|
+--------------------+------------+
only showing top 20 rows

+----------+--------------------+
|prediction|            features|
+----------+--------------------+
|       

## TEST

In [43]:
import numpy as np
import pandas as pd

In [44]:
WM = model_WM.transform(test_TT)
DW = model_DW.transform(test_TT) 
MW = model_MW.transform(test_TT)
K  = model_K.transform(test_TT)
TD = model_TD.transform(test_TT)

In [45]:
WM_v = WM.select("prediction").collect()
DW_v = DW.select("prediction").collect()
MW_v = MW.select("prediction").collect()
K_v  = K.select("prediction").collect()
TD_v = TD.select("prediction").collect()

In [46]:
WM_v = [i[0] for i in WM_v]
DW_v = [i[0] for i in DW_v]
MW_v = [i[0] for i in MW_v]
K_v =  [i[0] for i in K_v]
TD_v = [i[0] for i in TD_v]

In [49]:
print(K_v)

[1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0,

In [47]:
results = np.zeros((len(WM_v), 5))
results[:, 0] = WM_v
results[:, 1] = DW_v
results[:, 2] = TD_v
results[:, 3] = MW_v
results[:, 4] = K_v

In [48]:
# np.unique(results,return_counts = True)
inputes = pd.read_csv("./InputTest.csv")
results

array([[0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [54]:
output_res = pd.DataFrame(results.astype(int), columns=["Washing Machine", "Dishwasher", "Tumble Dryer", "Microwave", "Kettle"])
output_res.insert(0, "Index", inputes["Index"])
output_res.to_csv("./Results5.csv", index=False)