# Exploratory Data Analysis
Perform Exploratory Data Analysis on the provided data in order to understand and organize it.

In [17]:
import pandas as pd
import csv
from datetime import datetime

DATA_PATH = "data"
DATA_FILES = ["/causes.csv", "/components.csv", "/labels.csv", "/maintenance.csv", "/procedures.csv", "/relations.csv", "/symptoms.csv", "/telemetry.csv"]

dfs = []
for DATA_FILE in DATA_FILES:
    filename = DATA_PATH + DATA_FILE

    df = pd.read_csv(filename)
    print(df.describe())
    dfs.append(df)

filename = DATA_PATH + DATA_FILES[0]
# causes[cause_id] -> name
causes = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            causes[str(row[0])] = str(row[1])
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[1]
# components[components_id] -> (name, parent, function)
components = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            components[str(row[0])] = (str(row[1]), str(row[2]), str(row[3]))
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[2]
# labels[(timestamp, machine_id)] -> (overheat)
labels = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            dt_object = datetime.fromisoformat(row[0])
            labels[(dt_object, str(row[1]))] = str(row[2])
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[3]
# maintenance[(timestamp, machine_id)] -> (action, duration, sucess)
maintenance = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            dt_object = datetime.fromisoformat(row[0])
            maintenance[(dt_object, str(row[1]))] = (str(row[2]), float(row[3]), bool(row[4]))
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[4]
# procedures[procedure_id] -> (target, mitigates, effort, cost, risk)
procedures = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            procedures[str(row[0])] = (str(row[1]), str(row[2]), str(row[3]), float(row[4]), float(row[5]), float(row[6]))
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[5]
# relations[subject] -> (effect, object)
relations = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            relations[str(row[0])] = (str(row[1]), str(row[2]))
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[6]
# symptoms[symptom_id] = name
symptoms = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            symptoms[str(row[0])] = str(row[1])
except Exception as e:
    print(f"An error occurred: {e}")

filename = DATA_PATH + DATA_FILES[7]
# telemetry[(timestamp, machine_id)] -> (spindle_t, ambient_t, vibration_rms, coolant, feed_rate, spindle_speed, load, power_kw, tool_wear)
telemetry = {}
try:
    with open(filename, mode='r', encoding='utf-8') as f:
        reader = csv.reader(f)
        next(reader)

        for row in reader:
            dt_object = datetime.fromisoformat(row[0])
            telemetry[(dt_object, str(row[1]))] = (float(row[2]), float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), float(row[9]))
except Exception as e:
    print(f"An error occurred: {e}")

       cause_id             name
count         4                4
unique        4                4
top          K1  BearingWearHigh
freq          1                1
       component_id     name parent_component     function
count             5        5                3            5
unique            5        5                2            5
top              C1  Spindle               C2  Rotate tool
freq              1        1                2            1
       spindle_overheat
count          103680.0
mean                0.0
std                 0.0
min                 0.0
25%                 0.0
50%                 0.0
75%                 0.0
max                 0.0
       duration_h  success
count   23.000000     23.0
mean     2.195652      1.0
std      1.115379      0.0
min      1.000000      1.0
25%      1.500000      1.0
50%      1.500000      1.0
75%      3.000000      1.0
max      4.000000      1.0
       effort_h  spare_parts_cost_eur  risk_rating
count  4.000000               

In [19]:
print(relations)

{'BearingWearHigh': ('affectsComponent', 'Bearing'), 'FanFault': ('affectsComponent', 'Fan'), 'CloggedFilter': ('affectsComponent', 'Filter'), 'LowCoolingEfficiency': ('affectsComponent', 'CoolingSystem')}
