<a href="https://colab.research.google.com/github/Damntoochill/Learning-ML/blob/master/TurbofanEDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [0]:
from google.colab import files
files.upload()

In [0]:
train = pd.read_csv('train_FD001.csv', header = None)
test = pd.read_csv("test_FD001.csv", header = None)

In [0]:
index_columns_names =  ["UnitNumber","Cycle"]
operational_settings_columns_names = ["OpSet"+str(i) for i in range(1,4)]
sensor_measure_columns_names =["SensorMeasure"+str(i) for i in range(1,22)]
input_file_column_names = index_columns_names + operational_settings_columns_names + sensor_measure_columns_names

In [0]:
train.columns = input_file_column_names

In [0]:
test.columns = input_file_column_names

In [0]:
train.info()

In [0]:
test.info()

In [0]:
train.describe().transpose()

In [0]:
def RUL_predictor(df):
  max_cycle = max(df["Cycle"])
  rul = max_cycle - df["Cycle"]
  return rul.tolist()

Rul = []
for id in set(train["UnitNumber"]):
  Rul.extend(RUL_predictor(train[train["UnitNumber"] == id]))
  

In [0]:
train["RUL"] = Rul

In [0]:
train.head()

In [0]:
g = sns.PairGrid(data=train.query('UnitNumber < 10') ,
                 x_vars=["RUL"],
                 y_vars=sensor_measure_columns_names + operational_settings_columns_names,
                 hue="UnitNumber", size=3, aspect=2.5)
g = g.map(plt.scatter, alpha=0.5)
g = g.set(xlim=(300,0))
g = g.add_legend()

In [0]:
train.drop(["OpSet3","SensorMeasure1","SensorMeasure5","SensorMeasure10","SensorMeasure16","SensorMeasure18","SensorMeasure19"], axis = 1, inplace =True)
test.drop(["OpSet3","SensorMeasure1","SensorMeasure5","SensorMeasure10","SensorMeasure16","SensorMeasure18","SensorMeasure19"], axis = 1, inplace =True)

In [0]:
train.hist(bins=50, figsize=(18,16))
plt.show()

In [0]:
train_cycles = train.groupby("UnitNumber", as_index = False)["Cycle"].max()
test_cycles = test.groupby("UnitNumber", as_index = False)["Cycle"].max()

In [0]:
fig = plt.figure(figsize = (16,12))
fig.add_subplot(1,2,1)
bar_labels = list(train_cycles['UnitNumber'])
bars = plt.bar(list(train_cycles['UnitNumber']), train_cycles['Cycle'], color='red')
plt.ylim([0, 400])
plt.xlabel('Units', fontsize=16)
plt.ylabel('Max. Cycles', fontsize=16)
plt.title('Max. Cycles per unit in trainset', fontsize=16)
plt.xticks(np.arange(min(bar_labels)-1, max(bar_labels)-1, 5.0), fontsize=12)
plt.yticks(fontsize=12)
fig.add_subplot(1,2,2)
bars = plt.bar(list(test_cycles['UnitNumber']), test_cycles['Cycle'], color='grey')
plt.ylim([0, 400])
plt.xlabel('Units', fontsize=16)
plt.ylabel('Max. Cycles', fontsize=16)
plt.title('Max. Cycles per unit in testset', fontsize=16)
plt.xticks(np.arange(min(bar_labels)-1, max(bar_labels)-1, 5.0), fontsize=12)
plt.yticks(fontsize=12)
plt.show()

In [0]:
values = train[train.UnitNumber==1].values
groups = [5, 6, 7, 8, 9, 10, 11,12,13]
i = 1
plt.figure(figsize=(10,20))
for group in groups:
	plt.subplot(len(groups), 1, i)
	plt.plot(values[:, group])
	plt.title(train.columns[group], y=0.5, loc='right')
	i += 1
plt.show()

In [0]:
plt.figure(figsize = (8, 8))
plt.plot(train[train.UnitNumber==1].Cycle, train[train.UnitNumber==1].SensorMeasure7)
plt.plot(train[train.UnitNumber==2].Cycle, train[train.UnitNumber==2].SensorMeasure7)
plt.plot(train[train.UnitNumber==3].Cycle, train[train.UnitNumber==3].SensorMeasure7)
plt.plot(train[train.UnitNumber==4].Cycle, train[train.UnitNumber==4].SensorMeasure7)
plt.plot(train[train.UnitNumber==5].Cycle, train[train.UnitNumber==5].SensorMeasure7)
plt.plot(train[train.UnitNumber==6].Cycle, train[train.UnitNumber==6].SensorMeasure7)
plt.plot(train[train.UnitNumber==7].Cycle, train[train.UnitNumber==7].SensorMeasure7)
plt.plot(train[train.UnitNumber==8].Cycle, train[train.UnitNumber==8].SensorMeasure7)
plt.plot(train[train.UnitNumber==9].Cycle, train[train.UnitNumber==9].SensorMeasure7)
plt.plot(train[train.UnitNumber==10].Cycle, train[train.UnitNumber==10].SensorMeasure7)
plt.xlabel('# Cycles')
plt.ylabel('Sensor measurements')
plt.show()

In [0]:
plt.figure(figsize = (8, 8))
plt.plot(train[train.UnitNumber==1].Cycle, train[train.UnitNumber==1].SensorMeasure2)
plt.plot(train[train.UnitNumber==2].Cycle, train[train.UnitNumber==2].SensorMeasure2)
plt.plot(train[train.UnitNumber==3].Cycle, train[train.UnitNumber==3].SensorMeasure2)
plt.plot(train[train.UnitNumber==4].Cycle, train[train.UnitNumber==4].SensorMeasure2)
plt.plot(train[train.UnitNumber==5].Cycle, train[train.UnitNumber==5].SensorMeasure2)
plt.plot(train[train.UnitNumber==6].Cycle, train[train.UnitNumber==6].SensorMeasure2)
plt.plot(train[train.UnitNumber==7].Cycle, train[train.UnitNumber==7].SensorMeasure2)
plt.plot(train[train.UnitNumber==8].Cycle, train[train.UnitNumber==8].SensorMeasure2)
plt.plot(train[train.UnitNumber==9].Cycle, train[train.UnitNumber==9].SensorMeasure2)
plt.plot(train[train.UnitNumber==10].Cycle, train[train.UnitNumber==10].SensorMeasure2)
plt.xlabel('# Cycles')
plt.ylabel('Sensor measurements')
plt.show()

In [0]:
minb = train.groupby('UnitNumber', as_index=False).min().head(10)
maxb = train.groupby('UnitNumber', as_index=False).max().head(10)
mmtable = minb.append(maxb, ignore_index=True)

In [0]:
print(mmtable)

In [0]:
plt.figure(figsize = (12,12))
col = np.concatenate((np.repeat('red', 10), np.repeat('blue', 10)), axis=0)
bar_labels = list(mmtable['UnitNumber'])
x_pos = list(range(len(bar_labels)))
bars = plt.bar(x_pos, mmtable['SensorMeasure2'], color=col)
plt.ylim([640, 645])
plt.xlabel('Units', fontsize=14)
plt.ylabel('SensorMeasure2', fontsize=14)
plt.xticks(x_pos, bar_labels, fontsize=14)
plt.yticks(fontsize=14)
plt.show()

In [0]:
train.describe().transpose()

In [0]:
newtrain = train.copy()

In [0]:
from sklearn.preprocessing import MinMaxScaler

In [0]:
scaler = MinMaxScaler()
newtrain.iloc[:,2:19] = scaler.fit_transform(newtrain.iloc[:,2:19])

In [0]:
newtrain.describe().transpose()

In [0]:
newtest = test.copy()

In [0]:
newtest.iloc[:,2:19] = scaler.transform(newtest.iloc[:,2:19])

In [0]:
newtest.describe().transpose()

In [0]:
test.describe().transpose()

In [0]:


fig = plt.figure(figsize = (8, 8))
fig.add_subplot(1,2,1)
plt.plot(train[train.UnitNumber==1].SensorMeasure2)
plt.plot(test[test.UnitNumber==1].SensorMeasure2)
plt.legend(['Train','Test'], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0)
plt.ylabel('Original unit')
fig.add_subplot(1,2,2)
plt.plot(newtrain[newtrain.UnitNumber==1].SensorMeasure2)
plt.plot(newtest[newtest.UnitNumber==1].SensorMeasure2)
plt.legend(['Scaled Train','Scaled Test'], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0)
plt.ylabel('Scaled unit')
plt.show()



In [0]:


def fractionTTF(dat,q):
    return(dat.RUL[q]-dat.RUL.min()) / float(dat.RUL.max()-dat.RUL.min())



In [0]:
fTTFz = []
fTTF = []

for i in range(train['UnitNumber'].min(),train['UnitNumber'].max()+1):
    dat=train[train.UnitNumber==i]
    dat = dat.reset_index(drop=True)
    for q in range(len(dat)):
        fTTFz = fractionTTF(dat, q)
        fTTF.append(fTTFz)
newtrain['RUL'] = fTTF

In [0]:
mx = train_cycles.iloc[0:4,1].sum()

fig = plt.figure(figsize = (8, 8))
fig.add_subplot(1,2,1)
plt.plot(newtrain.RUL[0:mx])
plt.legend(['Time to failure (in cycles)'], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0)
plt.ylabel('Original unit')
fig.add_subplot(1,2,2)
plt.plot(newtrain.RUL[0:mx])
plt.legend(['Time to failure (fraction)'], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0)
plt.ylabel('Scaled unit')
plt.show()

In [0]:
newtrain["RUL"].describe()

In [0]:
train.to_csv('train.csv')
newtrain.to_csv("newtrain.csv")
test.to_csv('test.csv')
newtest.to_csv('newtest.csv')