In [53]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import predictionModels as pm
import dFandLB as dflb
import dataClean as dc

In [54]:
downtime_data = pd.read_excel('Raw Data/Equipment downtime data (202308).xlsx')
downtime_data.to_csv('Raw Data/Equipment downtime data (202308).csv', index=False)

hierarchy_data = pd.read_excel('Raw Data/Operation level of equipment.xlsx')
hierarchy_data.to_csv('Raw Data/Operation level of equipment.csv', index=False)

downtimeNew = pd.read_excel('Raw Data/Equipment downtime data (202310).xlsx')
downtimeNew.to_csv('Raw Data/Equipment downtime data (202310).csv', index=False)

In [55]:
downtime_data['FaultDate'] = pd.to_datetime(downtime_data['FaultDate'], errors='coerce').dt.date
downtimeNew['FaultDate'] = pd.to_datetime(downtimeNew['FaultDate'], errors='coerce').dt.date

downtime_data['FaultTime'] = downtime_data['FaultTime'].apply(lambda x: x.strftime('%H:%M:%S') if len(str(x)) > 8 else x)
downtimeNew['FaultTime'] = downtimeNew['FaultTime'].apply(lambda x: x.strftime('%H:%M:%S') if len(str(x)) > 8 else x)

downtimeNew = downtimeNew.dropna(subset=['FaultDate', 'FaultTime'])

pattern_data = downtime_data.iloc[:, :14]
pattern_data = pattern_data.drop(['ID', 'DutyOfficer', 'Manager email address'], axis=1)
pattern_data['FaultDateTime'] = pd.to_datetime(pattern_data['FaultDate'].astype(str) + ' ' + pattern_data['FaultTime'].astype(str))
dt2010 = pd.to_datetime('2010-01-01 00:00:00')
pattern_data.drop(['FaultDate', 'FaultTime'], axis=1, inplace=True)
pattern_data.sort_values(by=['FaultDateTime'], inplace=True)
pattern_data = pattern_data[pattern_data['FaultDateTime'] >= dt2010]
pattern_data = pattern_data.drop(['LogEntry', 'DutyOfficer comments', 'Managerscomments', 'FaultRepair', 'FaultDescription', 'Group', 'Downtime'], axis=1)

pattern_new = downtimeNew.iloc[:, :14]
pattern_new = pattern_new.drop(['ID', 'DutyOfficer', 'Manager email address'], axis=1)
pattern_new['FaultDateTime'] = pd.to_datetime(pattern_new['FaultDate'].astype(str) + ' ' + pattern_new['FaultTime'].astype(str))
dtnew = pd.to_datetime('2023-08-04 06:00:00') ## change month to 8
pattern_new.drop(['FaultDate', 'FaultTime'], axis=1, inplace=True)
pattern_new.sort_values(by=['FaultDateTime'], inplace=True)
pattern_new = pattern_new[pattern_new['FaultDateTime'] >= dtnew]
pattern_new = pattern_new.drop(['LogEntry', 'DutyOfficer comments', 'Managerscomments', 'FaultRepair', 'FaultDescription', 'Group', 'Downtime'], axis=1)

pattern_data['Equipment'] = pattern_data['Equipment'].str.lower()
pattern_data['Equipment'] = pattern_data['Equipment'].str.replace('[^\w\s]', '')

pattern_new['Equipment'] = pattern_new['Equipment'].str.lower()
pattern_new['Equipment'] = pattern_new['Equipment'].str.replace('[^\w\s]', '')

  pattern_data['Equipment'] = pattern_data['Equipment'].str.replace('[^\w\s]', '')
  pattern_new['Equipment'] = pattern_new['Equipment'].str.replace('[^\w\s]', '')
  pattern_data['Equipment'] = pattern_data['Equipment'].str.replace('[^\w\s]', '')
  pattern_new['Equipment'] = pattern_new['Equipment'].str.replace('[^\w\s]', '')


In [56]:
set1 = set(pattern_data['Equipment'].tolist())

In [57]:
lb = LabelEncoder()
pattern_data['Equipment'] = lb.fit_transform(pattern_data['Equipment'])
pattern_data['NumericDates'] = pattern_data['FaultDateTime'].astype(np.int64) // 10 ** 9
norm  = pattern_data['NumericDates'].min()
# pattern_data['NumericDates'] = pattern_data['NumericDates'] - norm

In [58]:
column_names = []
for i in range(20):
    column_names.append(f"Failed Piece {i+1}")
newTactic = pd.DataFrame(columns = column_names)
for column in newTactic:
    newTactic[column] = [[]]*pattern_data.shape[0]
newTacticLabel = [0]*pattern_data.shape[0]

In [59]:
twindow = pd.Timedelta(days=3)

for i in range(pattern_data.shape[0]):
    temp = 0
    for j in range(i, i+20):
        if j>=pattern_data.shape[0]:
            break
        inner_eq = pattern_data.iloc[j]['Equipment']
        dtnum = pattern_data.iloc[j]['NumericDates']
        newTactic[f"Failed Piece {j-i+1}"][i] = [inner_eq, dtnum]
        temp = j
    cur_dt = pattern_data.iloc[temp]['FaultDateTime']
    for j in range(temp, pattern_data.shape[0]):
        inner_eq = pattern_data.iloc[j]['Equipment']
        inner_dt = pattern_data.iloc[j]['FaultDateTime']
        if inner_eq == 'ion source':
            newTacticLabel[i] = 1
            break
        if (inner_dt - cur_dt) >= twindow:
            break

In [62]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the CNN model
model = models.Sequential()

# Convolutional layers
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(20, pattern_data.shape[0], 2)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((1, 1)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((1, 1)))

# Dense layers (fully connected layers)
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))  # Assuming 10 classes for classification

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use 'categorical_crossentropy' if you have one-hot encoded labels
              metrics=['accuracy'])

# Display the model summary
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 18, 24434, 32)     608       
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 9, 12217, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 7, 12215, 64)      18496     
                                                                 
 max_pooling2d_4 (MaxPoolin  (None, 7, 12215, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 5, 12213, 64)      36928     
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 5, 12213, 64)     

In [64]:
training_x, testing_x, training_y, testing_y = train_test_split(newTactic, newTacticLabel, test_size=0.2, shuffle=True)

In [67]:
newTactic

Unnamed: 0,Failed Piece 1,Failed Piece 2,Failed Piece 3,Failed Piece 4,Failed Piece 5,Failed Piece 6,Failed Piece 7,Failed Piece 8,Failed Piece 9,Failed Piece 10,Failed Piece 11,Failed Piece 12,Failed Piece 13,Failed Piece 14,Failed Piece 15,Failed Piece 16,Failed Piece 17,Failed Piece 18,Failed Piece 19,Failed Piece 20
0,"[109, 1266309000]","[103, 1266313200]","[88, 1266320400]","[90, 1266323280]","[51, 1266335100]","[51, 1266335940]","[90, 1266343020]","[88, 1266355020]","[88, 1266358020]","[55, 1266360900]","[31, 1266361680]","[90, 1266362400]","[88, 1266362580]","[88, 1266362640]","[88, 1266362940]","[88, 1266363600]","[88, 1266363720]","[55, 1266364440]","[88, 1266366240]","[88, 1266370560]"
1,"[103, 1266313200]","[88, 1266320400]","[90, 1266323280]","[51, 1266335100]","[51, 1266335940]","[90, 1266343020]","[88, 1266355020]","[88, 1266358020]","[55, 1266360900]","[31, 1266361680]","[90, 1266362400]","[88, 1266362580]","[88, 1266362640]","[88, 1266362940]","[88, 1266363600]","[88, 1266363720]","[55, 1266364440]","[88, 1266366240]","[88, 1266370560]","[90, 1266373980]"
2,"[88, 1266320400]","[90, 1266323280]","[51, 1266335100]","[51, 1266335940]","[90, 1266343020]","[88, 1266355020]","[88, 1266358020]","[55, 1266360900]","[31, 1266361680]","[90, 1266362400]","[88, 1266362580]","[88, 1266362640]","[88, 1266362940]","[88, 1266363600]","[88, 1266363720]","[55, 1266364440]","[88, 1266366240]","[88, 1266370560]","[90, 1266373980]","[88, 1266374760]"
3,"[90, 1266323280]","[51, 1266335100]","[51, 1266335940]","[90, 1266343020]","[88, 1266355020]","[88, 1266358020]","[55, 1266360900]","[31, 1266361680]","[90, 1266362400]","[88, 1266362580]","[88, 1266362640]","[88, 1266362940]","[88, 1266363600]","[88, 1266363720]","[55, 1266364440]","[88, 1266366240]","[88, 1266370560]","[90, 1266373980]","[88, 1266374760]","[23, 1266375720]"
4,"[51, 1266335100]","[51, 1266335940]","[90, 1266343020]","[88, 1266355020]","[88, 1266358020]","[55, 1266360900]","[31, 1266361680]","[90, 1266362400]","[88, 1266362580]","[88, 1266362640]","[88, 1266362940]","[88, 1266363600]","[88, 1266363720]","[55, 1266364440]","[88, 1266366240]","[88, 1266370560]","[90, 1266373980]","[88, 1266374760]","[23, 1266375720]","[88, 1266377400]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24431,"[71, 1691086929]","[64, 1691094861]","[71, 1691111449]","[71, 1691122651]","[76, 1691125642]",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
24432,"[64, 1691094861]","[71, 1691111449]","[71, 1691122651]","[76, 1691125642]",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
24433,"[71, 1691111449]","[71, 1691122651]","[76, 1691125642]",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]
24434,"[71, 1691122651]","[76, 1691125642]",[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]


In [65]:
# Assuming you have train_data and test_data
model.fit(training_x, training_y, epochs=10, validation_data=(testing_x, testing_y))


ValueError: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, (<class 'list'> containing values of types {"<class 'int'>"})

In [60]:
# import warnings
# from collections import Counter
# warnings.filterwarnings('ignore')
# for ur in set(pattern_data['User Run']):
#     ion_src = pattern_data[pattern_data['User Run'] == ur]
#     # for eq in set1:
#     ion_src1 = ion_src[ion_src['Equipment'] == 'ion source']
#     ion_src1['dateOnly'] = ion_src1['FaultDateTime'].dt.date
#     lst = ion_src1['dateOnly'].tolist()
#     ct = Counter(lst)
#     if len(ct) > 0:
#         dates, counts  = zip(*sorted(ct.items()))
#         plt.bar(dates, counts, label=eq)
#         plt.title(f'{ur} {eq}')
#         plt.xlabel('Date')
#         plt.ylabel('Frequency')
#         plt.xlim([min(dates), max(dates)])
#         plt.xticks(rotation=90)
#     plt.show()
# warnings.resetwarnings()