In [None]:
import os
import pandas as pd

for l in os.listdir("dataset/fermi_excel/Fermate"):
    df = pd.read_excel(f"dataset/fermi_excel/Fermate/{l}")
    name = l.replace(".xls", "")

    df.to_csv(f"dataset/fermi/Fermate/{name}.csv", index=False)

df = pd.read_excel(f"dataset/fermi_excel/FERMATE 2211 ACR.xls")
df.to_csv(f"dataset/fermi/FERMATE 2211 ACR.csv", index=False)

Now I iterate for every machine to get the data from the 3 datasets and merge them into one dataset, removing the ones that are not useful for the analysis.

IT1 ~ Now we can check for all the dataset and see what we can do with them

In [None]:
import pandas as pd
from scripts.getDataset import getEntireDataset, forEveryMachine, cleanDataset

DEBUG = True
pd.set_option("display.max_rows", None)
pd.options.mode.copy_on_write = False

most_interesting = []

def fn(machineId, year, month):
    dataset = getEntireDataset(machineId, year, month, False)

    if dataset.empty:
        print("Skipping as empty", machineId, year, month)
        return

    dataset = cleanDataset(dataset)
    dataset.to_csv(
        f"dataset/results/it1/id-{machineId}_{year}-{month}.csv", index=False
    )

    most_interesting.append((machineId, year, month))

forEveryMachine(fn)

print("\n\n\nThe most interesting are the followings:")
for machineId, year, month in most_interesting:
    print(f"- Machine {machineId} - {year}/{month}")

# for dataset, machineId, year, month in most_interesting:
#     plot(dataset, machineId, year, month)

IT2 ~ It takes 25 minutes to compute everything but now I want to look at the lifetime of the machines and see if I can find something interesting, merging the data into one file for machine

In [None]:
from scripts.getSingleDataset.utils import getCleanDataset
import pandas as pd
import os

complete_dataset = {}

for path in os.listdir("dataset/results/it1"):
    splitted_filename = [p.split("-") for p in path.replace(".csv", "").split("_")]

    machineId = int(splitted_filename[0][1])
    year = int(splitted_filename[1][0])
    month = int(splitted_filename[1][1])

    d = getCleanDataset(f"dataset/results/it1/id-{machineId}_{year}-{month}.csv")

    value = [d]

    if machineId in complete_dataset:
        old_data = complete_dataset.get(machineId)

        for old in old_data:
            value.append(old)

    complete_dataset.update({machineId: value})

for machineId in complete_dataset.keys():
    d = pd.concat(complete_dataset.get(machineId))

    d.to_csv(f"dataset/results/it2/id-{machineId}.csv", mode="w", index=False)

In [None]:
# We can graphically show the results 
from scripts.plots import plot


for path in os.listdir("dataset/results/it2"):
    df = getCleanDataset(f"dataset/results/it2/{path}")

    plot(df)

Now we will use a Machine learning model to predict when a machine will stop

In [None]:
import os
import pandas as pd
from scripts.getSingleDataset.getProductions import getProductionWithFixedComma
from sklearn.ensemble import HistGradientBoostingClassifier
pd.set_option("display.max_rows", None)

SHIFT_VALUE = 4
BASE_COLUMNS_TO_SHIFT = {
    "Productions": "Productions_prev",
    "EnergyConsumption": "EnergyConsumption_prev",
    "Material_HASH": "Material_Previous",
    "COD_ART_HASH": "COD_ART_Previous",
}

shift_columns = [
    f"{c}_{s}"
    for c in BASE_COLUMNS_TO_SHIFT.values()
    for s in range(1, SHIFT_VALUE + 1)
]

COLUMNS = [
    "MachineId",
    "Productions",
    "EnergyConsumption",
    "COD_ART_HASH",
    "Material_HASH",
] + shift_columns


# get materials
materials = getProductionWithFixedComma(f"dataset/fermi/FERMATE 2211 ACR.csv")[
    ["CODART", "ACR"]
].drop_duplicates()
materials.rename(columns={"CODART": "COD_ART", "ACR": "Material"}, inplace=True)

# def add_previous_data(data: pd.DataFrame):
#     previous_size = data.shape[0]

#     for s in range(1, SHIFT_VALUE + 1):
#         for c in BASE_COLUMNS_TO_SHIFT.keys():
#             data.loc[f"{BASE_COLUMNS_TO_SHIFT[c]}_{s}"] = data[c].shift(s)

#     assert previous_size == data.shape[0]

#     return data
# get dataset
train_data = pd.DataFrame()
for file in os.listdir("dataset/results/it2"):
    data = pd.read_csv(f"dataset/results/it2/{file}")

    data.sort_values(by="START_DATE", inplace=True)

    data = pd.merge(data, materials, how="left")
    data["Material_HASH"] = data["Material"].apply(hash)

    data["COD_ART_HASH"] = data["COD_ART"].apply(hash)

    data["MachineId"] = int(file.replace(".csv", "").split("-")[1])

    data.dropna()

    previous_size = data.shape[0]

    for s in range(1, SHIFT_VALUE + 1):
        for c in BASE_COLUMNS_TO_SHIFT.keys():
            data[f"{BASE_COLUMNS_TO_SHIFT[c]}_{s}"] = data[c].shift(s)

    assert previous_size == data.shape[0]

    train_data = pd.concat([train_data, data])

# train_data[:10000].to_csv("dataset/results/complete.csv", mode="w", index=False)
# print(train_data.columns)

def calculateModel(dataset: pd.DataFrame, model: HistGradientBoostingClassifier):
    print("\tTotal rows:", dataset.shape[0])
    rows = int(dataset.shape[0] * 0.8)
    train_d = dataset[:rows]
    test_d = dataset[rows:]

    if train_d.shape[0] == 0 or test_d.shape[0] == 0:
        raise Exception(
            "Not enough data",
            dataset.shape[0],
            train_d.shape[0],
            test_d.shape[0],
        )

    # this checks for 0 values
    if train_d.shape[0] == 0 or test_d.shape[0] == 0:
        raise Exception(f"\tNot enough data, train: {train_d.shape[0]}, test: {test_d.shape[0]}")

    print("\tTraining rows:", train_d.shape[0])

    model.fit(train_d[COLUMNS], train_d["Stop"])

    # Valutare il modello utilizzando i dati di test
    prediction = model.predict(test_d[COLUMNS])

    df = pd.DataFrame(
        {
            "Prediction": prediction,
            "Real": test_d["Stop"],
            "Correct": prediction == test_d["Stop"],
        }
    )

    # this is only useful to remove the "Running" values from the calculations of the percentile
    df = df[df["Real"] != "Running"]

    res = df["Correct"].value_counts()

    errors = res[False] if False in res else 0

    if df.shape[0] == 0:
        raise Exception("Dataset is not usable")

    percentile = int(errors / df.shape[0] * 100)
    print(f"\tPrediction errors {percentile}%")
    if percentile == 0:
        print("---------")
        print(errors, df.shape[0])

        print(df)
        print(res)

    return percentile

model = HistGradientBoostingClassifier()

average = 0
usables = 0
for machine_id in train_data["MachineId"].unique():
    print("Machine", machine_id)

    dataset = train_data[train_data["MachineId"] == machine_id]

    try:
        average += calculateModel(dataset, model)
        usables += 1
    except Exception as e:
        print("\t", e)
if usables == 0:
    print("No usable data")
else:
    average = average / usables
    print(f"Avg Error: {average:.2f}%")

print("Complete model")

print(f"Error: {calculateModel(train_data, model)}%")

Now I will use PySpark to implement MlLib and train a model to predict when a machine will stop

In [4]:
import os
import pandas as pd
from scripts.getSingleDataset.getProductions import getProductionWithFixedComma

from pyspark.sql import SparkSession, DataFrame
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.feature import StringIndexer, VectorAssembler

# ---
from pyspark.ml.classification import GBTClassifier
from pyspark.ml.feature import StringIndexer, VectorIndexer
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml import Pipeline
from pyspark.sql.functions import when, isnan, isnull


pd.set_option("display.max_rows", None)

spark = SparkSession.builder.getOrCreate()

sc = spark.sparkContext


SHIFT_VALUE = 4
BASE_COLUMNS_TO_SHIFT = {
    "Productions": "Productions_prev",
    "EnergyConsumption": "EnergyConsumption_prev",
    "Material_HASH": "Material_Previous",
    "COD_ART_HASH": "COD_ART_Previous",
}

shift_columns = [
    f"{c}_{s}"
    for c in BASE_COLUMNS_TO_SHIFT.values()
    for s in range(1, SHIFT_VALUE + 1)
]

COLUMNS = [
    "MachineId",
    "Productions",
    "EnergyConsumption",
    "COD_ART_HASH",
    "Material_HASH",
] + shift_columns

# get materials
materials = getProductionWithFixedComma(f"dataset/fermi/FERMATE 2211 ACR.csv")[
    ["CODART", "ACR"]
].drop_duplicates()
materials.rename(columns={"CODART": "COD_ART", "ACR": "Material"}, inplace=True)

train_data = pd.DataFrame()
for file in os.listdir("dataset/results/it2"):
    data = pd.read_csv(f"dataset/results/it2/{file}")

    data.sort_values(by="START_DATE", inplace=True)

    data = pd.merge(data, materials, how="left")
    data["Material_HASH"] = data["Material"].apply(hash)

    data["COD_ART_HASH"] = data["COD_ART"].apply(hash)

    data["Stop_index"] = data["Stop"].apply(lambda x: 1 if x == "Running" else 0)

    data["MachineId"] = int(file.replace(".csv", "").split("-")[1])

    data.dropna()

    previous_size = data.shape[0]

    for s in range(1, SHIFT_VALUE + 1):
        for c in BASE_COLUMNS_TO_SHIFT.keys():
            data[f"{BASE_COLUMNS_TO_SHIFT[c]}_{s}"] = data[c].shift(s)

    assert previous_size == data.shape[0]

    train_data = pd.concat([train_data, data])

train_data.to_csv("dataset/results/complete.csv", mode="w", index=False)

assembler = VectorAssembler(inputCols=COLUMNS,outputCol="features", handleInvalid="keep")
labelIndexer = StringIndexer(
    inputCol="Stop_index", outputCol="indexedLabel", handleInvalid="keep"
)
featureIndexer = VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4, handleInvalid="keep")
gbt = GBTClassifier(labelCol="indexedLabel",featuresCol="indexedFeatures",maxIter=10)
evaluator = MulticlassClassificationEvaluator(
    labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy"
)

# MlLib
def calculateModel(dataset: DataFrame):
    dataset = spark.createDataFrame(dataset)

    print("\tTotal rows:", dataset.count())

    for col in dataset.columns:
        dataset = dataset.withColumn(
            col, when(isnan(dataset[col]), 0).otherwise(dataset[col])
        )

    dataset = assembler.transform(dataset)
    dataset = dataset.na.fill(0)

    train_d, test_d = dataset.randomSplit([0.8, 0.2])

    if train_d.count() == 0 or test_d.count() == 0:
        raise Exception(
            "Not enough data", dataset.count(), train_d.count(), test_d.count()
        )

    pipeline = Pipeline(
        stages=[labelIndexer.fit(train_d), featureIndexer.fit(train_d), gbt],
    )

    model = pipeline.fit(train_d)

    prediction = model.transform(test_d)

    accuracy = evaluator.evaluate(prediction)

    percentile = int((1-accuracy) * 100)
    print(f"\tPrediction errors {percentile}%")

    return percentile

average = 0
usables = 0
for machine_id in train_data["MachineId"].unique():
    print("Machine", machine_id)

    dataset = train_data[train_data["MachineId"] == machine_id]

    try:
        average += calculateModel(dataset)
        usables += 1
    except Exception as e:
        print("\t", e)

if usables == 0:
    print("No usable data")
else:
    average = average / usables
    print(f"Avg Error: {average:.2f}%")

print("Complete model")

print(f"Error: {calculateModel(train_data)}%")

Machine 304


24/03/04 10:07:51 WARN TaskSetManager: Stage 3130 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 57212


24/03/04 10:07:52 WARN TaskSetManager: Stage 3133 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:52 WARN TaskSetManager: Stage 3136 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:53 WARN TaskSetManager: Stage 3139 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:53 WARN TaskSetManager: Stage 3142 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:54 WARN TaskSetManager: Stage 3144 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:54 WARN TaskSetManager: Stage 3145 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:07:55 WARN TaskSetManager: Stage 3146 contains a task of very large size (1585 KiB). The maximum recommended task size is 10

	Prediction errors 5%
Machine 310


24/03/04 10:08:01 WARN TaskSetManager: Stage 3250 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 55456


24/03/04 10:08:01 WARN TaskSetManager: Stage 3253 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:02 WARN TaskSetManager: Stage 3256 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:02 WARN TaskSetManager: Stage 3259 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:03 WARN TaskSetManager: Stage 3262 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:03 WARN TaskSetManager: Stage 3264 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:03 WARN TaskSetManager: Stage 3265 contains a task of very large size (1369 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:04 WARN TaskSetManager: Stage 3266 contains a task of very large size (1369 KiB). The maximum recommended task size is 10

	Prediction errors 5%
Machine 305
	Total rows: 32043
	Prediction errors 3%
Machine 515


24/03/04 10:08:17 WARN TaskSetManager: Stage 3490 contains a task of very large size (1796 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 66167


24/03/04 10:08:18 WARN TaskSetManager: Stage 3493 contains a task of very large size (1796 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:18 WARN TaskSetManager: Stage 3496 contains a task of very large size (1796 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:19 WARN TaskSetManager: Stage 3499 contains a task of very large size (1796 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:19 WARN TaskSetManager: Stage 3502 contains a task of very large size (1796 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:20 WARN TaskSetManager: Stage 3504 contains a task of very large size (1797 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:20 WARN TaskSetManager: Stage 3505 contains a task of very large size (1797 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:20 WARN TaskSetManager: Stage 3506 contains a task of very large size (1797 KiB). The maximum recommended task size is 10

	Prediction errors 10%
Machine 313


24/03/04 10:08:27 WARN TaskSetManager: Stage 3610 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 62011


24/03/04 10:08:28 WARN TaskSetManager: Stage 3613 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:28 WARN TaskSetManager: Stage 3616 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:28 WARN TaskSetManager: Stage 3619 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:29 WARN TaskSetManager: Stage 3622 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:29 WARN TaskSetManager: Stage 3624 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:30 WARN TaskSetManager: Stage 3625 contains a task of very large size (1580 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:30 WARN TaskSetManager: Stage 3626 contains a task of very large size (1580 KiB). The maximum recommended task size is 10

	Prediction errors 5%
Machine 307


24/03/04 10:08:36 WARN TaskSetManager: Stage 3730 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 57154


24/03/04 10:08:36 WARN TaskSetManager: Stage 3733 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:37 WARN TaskSetManager: Stage 3736 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:37 WARN TaskSetManager: Stage 3739 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:37 WARN TaskSetManager: Stage 3742 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:38 WARN TaskSetManager: Stage 3744 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:38 WARN TaskSetManager: Stage 3745 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:38 WARN TaskSetManager: Stage 3746 contains a task of very large size (1587 KiB). The maximum recommended task size is 10

	Prediction errors 8%
Machine 110


24/03/04 10:08:44 WARN TaskSetManager: Stage 3850 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 53541


24/03/04 10:08:44 WARN TaskSetManager: Stage 3853 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:45 WARN TaskSetManager: Stage 3856 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:45 WARN TaskSetManager: Stage 3859 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:45 WARN TaskSetManager: Stage 3862 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:46 WARN TaskSetManager: Stage 3864 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:46 WARN TaskSetManager: Stage 3865 contains a task of very large size (1389 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:46 WARN TaskSetManager: Stage 3866 contains a task of very large size (1389 KiB). The maximum recommended task size is 10

	Prediction errors 9%
Machine 306


24/03/04 10:08:52 WARN TaskSetManager: Stage 3970 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 60346


24/03/04 10:08:52 WARN TaskSetManager: Stage 3973 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:53 WARN TaskSetManager: Stage 3976 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:53 WARN TaskSetManager: Stage 3979 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:53 WARN TaskSetManager: Stage 3982 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:54 WARN TaskSetManager: Stage 3984 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:54 WARN TaskSetManager: Stage 3985 contains a task of very large size (1585 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:08:54 WARN TaskSetManager: Stage 3986 contains a task of very large size (1585 KiB). The maximum recommended task size is 10

	Prediction errors 10%
Machine 302


24/03/04 10:09:01 WARN TaskSetManager: Stage 4090 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 63040


24/03/04 10:09:01 WARN TaskSetManager: Stage 4093 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:01 WARN TaskSetManager: Stage 4096 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:02 WARN TaskSetManager: Stage 4099 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:02 WARN TaskSetManager: Stage 4102 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:02 WARN TaskSetManager: Stage 4104 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:03 WARN TaskSetManager: Stage 4105 contains a task of very large size (1578 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:03 WARN TaskSetManager: Stage 4106 contains a task of very large size (1578 KiB). The maximum recommended task size is 10

	Prediction errors 9%
Machine 303


24/03/04 10:09:09 WARN TaskSetManager: Stage 4210 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 67722


24/03/04 10:09:10 WARN TaskSetManager: Stage 4213 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:10 WARN TaskSetManager: Stage 4216 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:10 WARN TaskSetManager: Stage 4219 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:11 WARN TaskSetManager: Stage 4222 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:11 WARN TaskSetManager: Stage 4224 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:12 WARN TaskSetManager: Stage 4225 contains a task of very large size (1810 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:12 WARN TaskSetManager: Stage 4226 contains a task of very large size (1810 KiB). The maximum recommended task size is 10

	Prediction errors 11%
Machine 301


24/03/04 10:09:18 WARN TaskSetManager: Stage 4330 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 64270


24/03/04 10:09:19 WARN TaskSetManager: Stage 4333 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:19 WARN TaskSetManager: Stage 4336 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:19 WARN TaskSetManager: Stage 4339 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:20 WARN TaskSetManager: Stage 4342 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:20 WARN TaskSetManager: Stage 4344 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:20 WARN TaskSetManager: Stage 4345 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:21 WARN TaskSetManager: Stage 4346 contains a task of very large size (1588 KiB). The maximum recommended task size is 10

	Prediction errors 9%
Machine 315


24/03/04 10:09:26 WARN TaskSetManager: Stage 4450 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 54439


24/03/04 10:09:27 WARN TaskSetManager: Stage 4453 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:27 WARN TaskSetManager: Stage 4456 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:27 WARN TaskSetManager: Stage 4459 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:28 WARN TaskSetManager: Stage 4462 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:28 WARN TaskSetManager: Stage 4464 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:28 WARN TaskSetManager: Stage 4465 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:29 WARN TaskSetManager: Stage 4466 contains a task of very large size (1367 KiB). The maximum recommended task size is 10

	Prediction errors 10%
Machine 314


24/03/04 10:09:34 WARN TaskSetManager: Stage 4570 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 51809


24/03/04 10:09:34 WARN TaskSetManager: Stage 4573 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:35 WARN TaskSetManager: Stage 4576 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:35 WARN TaskSetManager: Stage 4579 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:35 WARN TaskSetManager: Stage 4582 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:36 WARN TaskSetManager: Stage 4584 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:36 WARN TaskSetManager: Stage 4585 contains a task of very large size (1366 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:36 WARN TaskSetManager: Stage 4586 contains a task of very large size (1366 KiB). The maximum recommended task size is 10

	Prediction errors 6%
Machine 614


24/03/04 10:09:43 WARN TaskSetManager: Stage 4690 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 66088


24/03/04 10:09:43 WARN TaskSetManager: Stage 4693 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:43 WARN TaskSetManager: Stage 4696 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:43 WARN TaskSetManager: Stage 4699 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:44 WARN TaskSetManager: Stage 4702 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:44 WARN TaskSetManager: Stage 4704 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:45 WARN TaskSetManager: Stage 4705 contains a task of very large size (1802 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:45 WARN TaskSetManager: Stage 4706 contains a task of very large size (1802 KiB). The maximum recommended task size is 10

	Prediction errors 8%
Machine 611


24/03/04 10:09:51 WARN TaskSetManager: Stage 4810 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 63538


24/03/04 10:09:51 WARN TaskSetManager: Stage 4813 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:52 WARN TaskSetManager: Stage 4816 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:52 WARN TaskSetManager: Stage 4819 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:53 WARN TaskSetManager: Stage 4822 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:53 WARN TaskSetManager: Stage 4824 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:53 WARN TaskSetManager: Stage 4825 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:09:54 WARN TaskSetManager: Stage 4826 contains a task of very large size (1587 KiB). The maximum recommended task size is 10

	Prediction errors 12%
Machine 610


24/03/04 10:10:00 WARN TaskSetManager: Stage 4930 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 71324


24/03/04 10:10:00 WARN TaskSetManager: Stage 4933 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:01 WARN TaskSetManager: Stage 4936 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:01 WARN TaskSetManager: Stage 4939 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:02 WARN TaskSetManager: Stage 4942 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:02 WARN TaskSetManager: Stage 4944 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:02 WARN TaskSetManager: Stage 4945 contains a task of very large size (1795 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:03 WARN TaskSetManager: Stage 4946 contains a task of very large size (1795 KiB). The maximum recommended task size is 10

	Prediction errors 9%
Machine 612


24/03/04 10:10:09 WARN TaskSetManager: Stage 5050 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 60557


24/03/04 10:10:09 WARN TaskSetManager: Stage 5053 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:10 WARN TaskSetManager: Stage 5056 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:10 WARN TaskSetManager: Stage 5059 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:10 WARN TaskSetManager: Stage 5062 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:11 WARN TaskSetManager: Stage 5064 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:11 WARN TaskSetManager: Stage 5065 contains a task of very large size (1587 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:11 WARN TaskSetManager: Stage 5066 contains a task of very large size (1587 KiB). The maximum recommended task size is 10

	Prediction errors 11%
Machine 618


24/03/04 10:10:17 WARN TaskSetManager: Stage 5170 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 53037


24/03/04 10:10:17 WARN TaskSetManager: Stage 5173 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:17 WARN TaskSetManager: Stage 5176 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:18 WARN TaskSetManager: Stage 5179 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:18 WARN TaskSetManager: Stage 5182 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:19 WARN TaskSetManager: Stage 5184 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:19 WARN TaskSetManager: Stage 5185 contains a task of very large size (1367 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:19 WARN TaskSetManager: Stage 5186 contains a task of very large size (1367 KiB). The maximum recommended task size is 10

	Prediction errors 9%
Machine 319


24/03/04 10:10:26 WARN TaskSetManager: Stage 5290 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 72881


24/03/04 10:10:26 WARN TaskSetManager: Stage 5293 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:26 WARN TaskSetManager: Stage 5296 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:27 WARN TaskSetManager: Stage 5299 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:27 WARN TaskSetManager: Stage 5302 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:28 WARN TaskSetManager: Stage 5304 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:28 WARN TaskSetManager: Stage 5305 contains a task of very large size (2016 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:28 WARN TaskSetManager: Stage 5306 contains a task of very large size (2016 KiB). The maximum recommended task size is 10

	Prediction errors 10%
Machine 108


24/03/04 10:10:34 WARN TaskSetManager: Stage 5410 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 52412


24/03/04 10:10:34 WARN TaskSetManager: Stage 5413 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:35 WARN TaskSetManager: Stage 5416 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:35 WARN TaskSetManager: Stage 5419 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:35 WARN TaskSetManager: Stage 5422 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:36 WARN TaskSetManager: Stage 5424 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:36 WARN TaskSetManager: Stage 5425 contains a task of very large size (1375 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:36 WARN TaskSetManager: Stage 5426 contains a task of very large size (1375 KiB). The maximum recommended task size is 10

	Prediction errors 12%
Machine 308


24/03/04 10:10:41 WARN TaskSetManager: Stage 5530 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 41414


24/03/04 10:10:41 WARN TaskSetManager: Stage 5533 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:42 WARN TaskSetManager: Stage 5536 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:42 WARN TaskSetManager: Stage 5539 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:42 WARN TaskSetManager: Stage 5542 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:42 WARN TaskSetManager: Stage 5544 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:43 WARN TaskSetManager: Stage 5545 contains a task of very large size (1149 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:43 WARN TaskSetManager: Stage 5546 contains a task of very large size (1149 KiB). The maximum recommended task size is 10

	Prediction errors 4%
Machine 309


24/03/04 10:10:49 WARN TaskSetManager: Stage 5650 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.


	Total rows: 58030


24/03/04 10:10:49 WARN TaskSetManager: Stage 5653 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:49 WARN TaskSetManager: Stage 5656 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:49 WARN TaskSetManager: Stage 5659 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:50 WARN TaskSetManager: Stage 5662 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:50 WARN TaskSetManager: Stage 5664 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:50 WARN TaskSetManager: Stage 5665 contains a task of very large size (1588 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:10:51 WARN TaskSetManager: Stage 5666 contains a task of very large size (1588 KiB). The maximum recommended task size is 10

	Prediction errors 5%
Avg Error: 8.18%
Complete model


24/03/04 10:12:05 WARN TaskSetManager: Stage 5770 contains a task of very large size (35668 KiB). The maximum recommended task size is 1000 KiB.
24/03/04 10:12:05 ERROR Inbox: An error happened while processing message in the inbox for LocalSchedulerBackendEndpoint
java.lang.OutOfMemoryError: Java heap space
	at java.base/java.util.Arrays.copyOf(Arrays.java:3537)
	at java.base/java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:100)
	at java.base/java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:132)
	at org.apache.spark.util.ByteBufferOutputStream.write(ByteBufferOutputStream.scala:41)
	at java.base/java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1895)
	at java.base/java.io.ObjectOutputStream$BlockDataOutputStream.setBlockDataMode(ObjectOutputStream.java:1804)
	at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1201)
	at java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:355

KeyboardInterrupt: 

[Stage 5770:>                                                       (0 + 5) / 8]