# EDA - main focus on histograms

### Load libraries

In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter

### Load data from Excel file

In [None]:
data = pd.read_excel('../data/ThermoAI_Data_review_r5.xlsx',  sheet_name="Cleaned Imported Data", header=0)
data.head()

In [None]:
data.duplicated().value_counts()

In [None]:
# Date and time cleaning

data.iloc[2,1] = dt.time(11, 5, 00)
data.rename(columns = {'I':'date'}, inplace = True)
data['date'] = pd.to_datetime(data['date'])
data['datetime'] = data.apply(lambda data : dt.datetime.combine(data['date'],data['time']),1)

In [None]:
# Cleaning the type column

data['type'].fillna(0, inplace=True)
data['type'] = data['type'].astype(int)

In [None]:
# Categorized version of relevant columns

data['capacity_cat'] = pd.Categorical(data['capacity'])
data['type_cat'] = pd.Categorical(data['type'])
data['anomaly_cat'] = pd.Categorical(data['anomaly'])
data['pump_cat'] = pd.Categorical(data['pump'])

In [None]:
# Reorder the columns

data = data[['datetime', 'date', 'time', 'pump', 'pump_cat', 'capacity', 'capacity_cat', 'anomaly', 'anomaly_cat', 'type', 'type_cat',
       'vib1_x', 'vib1_y', 'vib1_z', 'vib2_x', 'vib2_y', 'vib2_z', 'amp1', 'amp2', 'mic1', 'mic2']]

In [None]:
data.info()

In [None]:
data.sample(10)

In [None]:
# add column for vibration magnitude using sqrt(x^2+y^2+z^2)

data['vib1_magnitude'] = np.sqrt(data['vib1_x']**2 + data['vib1_y']**2 + data['vib1_z']**2)
data['vib2_magnitude'] = np.sqrt(data['vib2_x']**2 + data['vib2_y']**2 + data['vib2_z']**2)

# Basic histograms

In [None]:
# Vib1_x Histogram grouped by type
VAR = 'vib1_x'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib1_y Histogram grouped by type
VAR = 'vib1_y'

data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type', bins=[x / 5 for x in range(int(min(data[VAR])),int(max(data[VAR])*5))])

In [None]:
# Vib1_z Histogram grouped by type
VAR = 'vib1_z'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib2_x Histogram grouped by type
VAR = 'vib2_x'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib2_y Histogram grouped by type
VAR = 'vib2_y'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib2_z Histogram grouped by type
VAR = 'vib2_z'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib1_magnitude Histogram grouped by type
VAR = 'vib1_magnitude'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Vib2_magnitude Histogram grouped by type
VAR = 'vib2_magnitude'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Amp1 Histogram grouped by type
VAR = 'amp1'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Amp2 Histogram grouped by type
VAR = 'amp2'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Mic1 Histogram grouped by type
VAR = 'mic1'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

In [None]:
# Mic2 Histogram grouped by type
VAR = 'mic2'
data.groupby(['type'])[VAR].plot.hist(alpha=0.5, legend=True, figsize=(8,4), title=f'Distribution of {VAR} with respect to anomaly type')
plt.xlabel(f'{VAR}')

# Amp1 and Amp2 analysis

#### In case of amp1 and amp2 it would be useful to plot separate histograms for small and big pump and then, also, for different capacities.

In [None]:
# Amp1 Histogram grouped by type for small pump

data[data['pump']=='small'].groupby(['type']).amp1.plot.hist(alpha=0.5, legend=True, title='Amp1 distribution for small pump with respect to anomaly type', figsize=(8,4))
plt.xlabel('amp1')

In [None]:
# Amp2 Histogram grouped by type for big pump
data[data['pump']=='big'].groupby(['type']).amp2.plot.hist(alpha=0.5, legend=True, title='Amp2 for big pump with respect to anomaly type', figsize=(8,4))
Cplt.xlabel('amp2')                                                

In [None]:
CAPACITY = 100
PUMP = 'small'
AMP = 'amp1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')


In [None]:
CAPACITY = 75
PUMP = 'small'
AMP = 'amp1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')

In [None]:
CAPACITY = 50
PUMP = 'small'
AMP = 'amp1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')

In [None]:
CAPACITY = 100
PUMP = 'big'
AMP = 'amp2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')

In [None]:
CAPACITY = 75
PUMP = 'big'
AMP = 'amp2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')

In [None]:
CAPACITY = 50
PUMP = 'big'
AMP = 'amp2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[AMP].plot.hist(alpha=0.5, legend=True, title=f'{AMP} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{AMP}')

#### Analysis of the above histograms leads to a conclusion that type 2 anomalies should be easily traceble with the use of amp1 in case of the small pump and amp2 in case of the big pump for higer capacities (75% and 100%). 

## Mic1 and Mic2 analysis

In [None]:
PUMP = 'small'
MIC = 'mic1'

data[data['pump']==PUMP].groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} distribution for {PUMP} pump with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
PUMP = 'big'
MIC = 'mic1'

data[data['pump']==PUMP].groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} distribution for {PUMP} pump with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

#### In case of the small pump, anomaly type 2 could be easily detected as anomaly, because the readings of mic1 tend to be much smaller as compared to the normal mode.

In [None]:
PUMP = 'small'
MIC = 'mic2'

data[data['pump']==PUMP].groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} distribution for {PUMP} pump with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
PUMP = 'big'
MIC = 'mic2'

data[data['pump']==PUMP].groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} distribution for {PUMP} pump with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

#### In case of the big pump, anomaly type 3 could be easily detected as anomaly, because the readings of mic2 tend to be much larger as compared to the normal mode.

In [None]:
CAPACITY = 100
PUMP = 'small'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 75
PUMP = 'small'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 50
PUMP = 'small'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

#### The three above histograms show more clerly what was stated before that in case of the small pump, anomaly type 2 could be easily detected as anomaly, because the readings of mic1 tend to be much smaller as compared to the normal mode.

In [None]:
CAPACITY = 100
PUMP = 'small'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

#### The above histogram shows that mic2 could be used to find anomaly type 2 for small pump at 100% capacity.

In [None]:
CAPACITY = 75
PUMP = 'small'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 50
PUMP = 'small'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 100
PUMP = 'big'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 75
PUMP = 'big'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 50
PUMP = 'big'
MIC = 'mic1'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 100
PUMP = 'big'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 75
PUMP = 'big'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

In [None]:
CAPACITY = 50
PUMP = 'big'
MIC = 'mic2'

data1 = data[data['capacity']==CAPACITY]
data2 = data1[data1['pump'] == f'{PUMP}']

data2.groupby(['type'])[MIC].plot.hist(alpha=0.5, legend=True, title=f'{MIC} for {PUMP} pump with capacity of {CAPACITY}% with respect to anomaly type', figsize=(8,4))
plt.xlabel(f'{MIC}')

### The two above histograms show, as previusly stated, that in case of the big pump, anomaly type 3 could be easily detected as anomaly, because the readings of mic2 tend to be much larger as compared to the normal mode.

#### In addition to this, in case of 50% capacity this is true for all anomalies, not only type 3 anomaly.



## Vibration analysis

In [None]:
# Define functions to plot histograms.

def plot_histogram(GROUPING, PUMP, VALUE):

    data[data['pump']==PUMP].groupby([GROUPING])[VALUE].plot.hist(alpha=0.5, legend=True, title=f'{VALUE} distribution for {PUMP} pump with respect to {GROUPING}', figsize=(8,4))
    plt.xlabel(f'{VALUE}')
    
    return

def plot_histogram_capacity(GROUPING, PUMP, VALUE, CAPACITY):
    
    data1 = data[data['capacity']==CAPACITY]
    data2 = data1[data1['pump'] == f'{PUMP}']

    data2.groupby([GROUPING])[VALUE].plot.hist(alpha=0.5, legend=True, title=f'{VALUE} distribution for {PUMP} pump with capacity of {CAPACITY}% with respect to {GROUPING}', figsize=(8,4), bins=[x / 5 for x in range(int(min(data[VALUE])),int(max(data[VALUE])*5))])
    plt.xlabel(f'{VALUE}')
    
    return
    

In [None]:
plot_histogram('type', 'small', 'vib1_x')

In [None]:
plot_histogram('type', 'small', 'vib1_y')

In [None]:
plot_histogram('type', 'small', 'vib1_z')

In [None]:
plot_histogram('type', 'small', 'vib2_x')

In [None]:
plot_histogram('type', 'small', 'vib2_y')

In [None]:
plot_histogram('type', 'small', 'vib2_z')

#### The above histogram shows that in case of small pump the type 2 anomaly should be easily detected with vib2_z.

In [None]:
plot_histogram('type', 'small', 'vib1_magnitude')

In [None]:
plot_histogram('type', 'small', 'vib2_magnitude')

In [None]:
plot_histogram('type', 'big', 'vib1_x')

#### The above histogram shows that in case of the big pump the type 3 anomaly should be easily detected as anomaly with vib1_x.

In [None]:
plot_histogram('type', 'big', 'vib1_y')

### The above histogram shows that in case of the big pump all 3 types of anomalies should be detected with vib1_y.

In [None]:
plot_histogram('type', 'big', 'vib1_z')

In [None]:
plot_histogram('type', 'big', 'vib2_x')

In [None]:
plot_histogram('type', 'big', 'vib2_y')

In [None]:
plot_histogram('type', 'big', 'vib2_z')

In [None]:
plot_histogram('type', 'big', 'vib1_magnitude')

#### Vib1_magnitude could be used to detect anomaly type 3 for the big pump

In [None]:
plot_histogram('type', 'big', 'vib2_magnitude')

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_x', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_x', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_x', 50)

### It was not visible on the histogram that did not take capacity into account, but the three above clearly show that for the small pump type 2 anomaly can be detected with vib1_x. Id addition to this, in case of 50% capacity also type 1 anomaly can be detected.

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_y', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_y', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_y', 50)

#### In case of the small pump type 2 anomaly should be detected with vib1_y at 100% capacity and type 1 anomaly should be detected at 75% capacity.

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_z', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_z', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_z', 50)

#### In case of the small pump type 2 anomaly should be detected with vib1_z at 100% and 50% capacity and type 1 anomaly should be detected at 75% capacity.

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_x', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_x', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_x', 50)

#### In case of the small pump type 2 anomaly should be detected with vib2_x at 100% and 75% capacity.

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_y', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_y', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_y', 50)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_magnitude', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_magnitude', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib1_magnitude', 50)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_magnitude', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_magnitude', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_magnitude', 50)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_z', 100)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_z', 75)

In [None]:
plot_histogram_capacity('type', 'small', 'vib2_z', 50)

#### The 3 above show what was shown previously that in case of small pump the type 2 anomaly should be easily detected with vib2_z.

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_x', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_x', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_x', 50)

#### In case of the big pump type 3 anomalies can be detected with vib1_x for all capacities. Type 2 anomalies can be  detected for 100% and 50% capacities and type 1 anomalies can be detected fot 50% capacity.

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_y', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_y', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_y', 50)

#### All 3 types of anomalies can be detected for the big pump at all capacities with vib1_y.

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_z', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_z', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_z', 50)

#### In case of the big pump type 1 anomalies can be detected with vib1_z at 75% and 50% capacities. Type 2 anomalies can be detected for 100% and 50% capacities and type 3 anomalies can be detected for all capacities.

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_x', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_x', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_x', 50)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_y', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_y', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_y', 50)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_z', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_z', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_z', 50)

#### Type 2 anomaly can be detected for the big pump at 100% and 50% capacity with vib2-z. Moreover, type 3 anomaly can be detected for capacity of 50%.

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_magnitude', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_magnitude', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib1_magnitude', 50)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_magnitude', 100)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_magnitude', 75)

In [None]:
plot_histogram_capacity('type', 'big', 'vib2_magnitude', 50)

## Analysis of the normal mode values

In [None]:
data_norm = data[data['anomaly']=='normal']

def plot_histogram_norm(VALUE, PUMP):

    data_norm[data_norm['pump']==PUMP].groupby(['capacity'])[VALUE].plot.hist(alpha=0.5, legend=True, title=f'{VALUE} distribution for {PUMP} pump with respect to its capacity', figsize=(8,4), bins=[x / 5 for x in range(int(min(data[VALUE])),int(max(data[VALUE])*5))])
    plt.xlabel(f'{VALUE}')
    
    return

def plot_histogram_norm_1(VALUE, CAPACITY):

    data_norm[data_norm['capacity']==CAPACITY].groupby(['pump'])[VALUE].plot.hist(alpha=0.5, legend=True, title=f'{VALUE} distribution for capacity of {CAPACITY}% with respect to pump type', figsize=(8,4), bins=[x / 25 for x in range(13,30)])
    plt.xlabel(f'{VALUE}')
    
    return

In [None]:
plot_histogram_norm('vib1_x', 'small')

In [None]:
plot_histogram_norm('vib1_y', 'small')

In [None]:
plot_histogram_norm('vib1_z', 'small')

In [None]:
plot_histogram_norm('vib2_x', 'small')

In [None]:
plot_histogram_norm('vib2_y', 'small')

In [None]:
plot_histogram_norm('vib2_z', 'small')

In [None]:
plot_histogram_norm('vib1_x', 'big')

In [None]:
plot_histogram_norm('vib1_y', 'big')

In [None]:
plot_histogram_norm('vib1_z', 'big')

In [None]:
plot_histogram_norm('vib2_x', 'big')

In [None]:
plot_histogram_norm('vib2_y', 'big')

In [None]:
plot_histogram_norm('vib2_z', 'big')

#### Vibrations for the small pump seem to be dependent on the capacity of the pumps, whereas, such a dependency cannot be found for the big pump. It seems that the capacity of 75% might be the most problematic, since the histogram is the widest.

In [None]:
plot_histogram_norm('mic1', 'small')

In [None]:
plot_histogram_norm('mic2', 'small')

In [None]:
plot_histogram_norm('mic1', 'big')

In [None]:
plot_histogram_norm('mic2', 'big')

#### Similar conclusions as for the vibrations can be drawn for the microphones.

In [None]:
plot_histogram_norm('amp1', 'small')

In [None]:
plot_histogram_norm('amp2', 'big')

In [None]:
plot_histogram_norm_1('vib1_x', 100)

In [None]:
plot_histogram_norm_1('vib1_x', 75)

In [None]:
plot_histogram_norm_1('vib1_x', 50)

In [None]:
plot_histogram_norm_1('vib1_y', 100)

In [None]:
plot_histogram_norm_1('vib1_y', 75)

In [None]:
plot_histogram_norm_1('vib1_y', 50)

In [None]:
plot_histogram_norm_1('vib1_z', 100)

In [None]:
plot_histogram_norm_1('vib1_z', 75)

In [None]:
plot_histogram_norm_1('vib1_x', 50)

In [None]:
plot_histogram_norm_1('vib2_x', 100)

In [None]:
plot_histogram_norm_1('vib2_x', 75)

In [None]:
plot_histogram_norm_1('vib2_x', 50)

In [None]:
plot_histogram_norm_1('vib2_y', 100)

In [None]:
plot_histogram_norm_1('vib2_y', 75)

In [None]:
plot_histogram_norm_1('vib2_y', 50)

In [None]:
plot_histogram_norm_1('vib2_z', 100)

In [None]:
plot_histogram_norm_1('vib2_z', 75)

In [None]:
plot_histogram_norm_1('vib2_z', 50)

In [None]:
plot_histogram_norm_1('mic1', 100)

In [None]:
plot_histogram_norm_1('mic1', 75)

In [None]:
plot_histogram_norm_1('mic1', 50)

In [None]:
plot_histogram_norm_1('mic2', 100)

In [None]:
plot_histogram_norm_1('mic2', 75)

In [None]:
plot_histogram_norm_1('mic2', 50)

#### All the histograms show that the behaviour of the 2 pump types is extremely different, thus they should be considered separately.

In [None]:
data.groupby(['pump', 'capacity', 'type']).size()

In [None]:
CAPACITY = 100
VALUE = 'vib1_x'

data_norm[data_norm['capacity']==CAPACITY].groupby(['pump'])[VALUE].plot.hist(alpha=0.5, legend=True, title=f'{VALUE} distribution for capacity of {CAPACITY}% with respect to pump type', figsize=(8,4), bins=[x / 25 for x in range(13,30)])
plt.xlabel(f'{VALUE}')