In [277]:
# Import dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import random_projection

# To perform Hot Encoding
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense

# To transform data
from datetime import datetime as dt

# To save transformer
import pickle

In [6]:
# Read in cleaned and transformed dataset
df = pd.read_csv('Model_Data.csv')
df.head(5)


  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,IB Material - Key,Installed At - Key,RMA Create Date,Defective,Serial Number,Shipped Day,Lifespan(days)
0,0,81Y8237,100034495,3/20/2017,1,233G04B,10/12/2012,1620.0
1,1,81Y8349,101400565,1/31/2018,1,231S01X,7/18/2011,2389.0
2,2,95Y0510,101183390,4/12/2017,1,EM0711427457,10/29/2013,1261.0
3,3,95Y0510,101183390,4/27/2017,1,EM0711427505,12/13/2012,1596.0
4,4,95Y0510,101183390,4/27/2017,1,EM0711427426,12/13/2012,1596.0


In [297]:
df['IB Material - Key'].value_counts()

SRX300-JSB            56824
EX4300-48P            48426
QFX-JSL-EDGE-ADV1     43899
EX3300-48P            35290
EX2200-24P-4G         33918
EX2200-48P-4G         31952
SRX110H2-VA           27606
EX2200-24T-4G         26499
SRX340-JSB            23212
EX4300-48T            23158
EX3300-24P            21944
EX3400-48P            20813
EX3300-24T            20219
SRX320-JSB            20158
SRX220H2              18396
SRX100H2              18293
EX2200-C-12P-2G       18230
SRX300-JSB-L          18206
EX3300-48T            17488
EX2200-C-12T-2G       17101
EX2200-48T-4G         16587
MPC-3D-16XGE-SFPP     14356
QFX5100-48S-3AFO      14086
QFX5100-24Q-3AFO      12949
ACX2200-DC            12633
EX2300-24T            12238
EX4300-24T            11697
ACX5K-L-1X10GE-S      11142
SRX345-JSB            10855
EX2300-48P            10801
                      ...  
FE-8_I/O                  1
PWR-M120-DC-R             1
M7I-FEB-SVC-MS-E-S        1
MX2K-PDM-OP-DC-BB         1
BP3AM4CL-01         

In [7]:
# Drop unnecessary columns and drop NaN for critical columns
df = df.drop(['Serial Number', 'RMA Create Date', 'Shipped Day'], axis=1)
df = df.dropna(subset=['Lifespan(days)'])
print("final dataframe shape: ", df.shape)


final dataframe shape:  (1408350, 5)


In [8]:
# Generate dummy columns for Product ID (IB Material - Key)
model_df = pd.get_dummies(df['IB Material - Key'], sparse=True)
model_df['Lifespan(days)'] = df['Lifespan(days)']
print("dummy shape: ", model_df.shape)

dummy shape:  (1408350, 2353)


In [None]:
#############################################################################################################

In [12]:
# Create transformer to reduce dimensions of the dummy variables
transformer = random_projection.GaussianRandomProjection(n_components=100)

In [13]:
# Transform input values and set up X and y for train,test,split
X_new = transformer.fit_transform(model_df)
y = df['Defective']

In [16]:
# Train, Test, Split
X_train, X_test, y_train, y_test = train_test_split(X_new, y, random_state=13)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [35]:
# Create neural net
model_2 = Sequential()
number_inputs = 100
number_hidden_nodes = 4
number_classes = 2 
model_2.add(Dense(units=number_hidden_nodes, activation='relu', input_dim=number_inputs))
model_2.add(Dense(units=100, activation='relu'))
model_2.add(Dense(units=number_classes, activation='softmax'))
model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 4)                 404       
_________________________________________________________________
dense_12 (Dense)             (None, 100)               500       
_________________________________________________________________
dense_13 (Dense)             (None, 2)                 202       
Total params: 1,106
Trainable params: 1,106
Non-trainable params: 0
_________________________________________________________________


In [38]:
# Compile the model
model_2.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [39]:
# Fit model with training data
### DO NOT RE-RUN ###
# model_2.fit(X_train,
#          y_train,
#          epochs=60,
#          shuffle=True,
#          verbose=2)
model_2 = load_model("RMA_Model.h5")

Epoch 1/60
 - 28s - loss: 0.1379 - acc: 0.9686
Epoch 2/60
 - 27s - loss: 0.1279 - acc: 0.9709
Epoch 3/60
 - 28s - loss: 0.1258 - acc: 0.9715
Epoch 4/60
 - 27s - loss: 0.1250 - acc: 0.9717
Epoch 5/60
 - 28s - loss: 0.1246 - acc: 0.9718
Epoch 6/60
 - 29s - loss: 0.1244 - acc: 0.9719
Epoch 7/60
 - 29s - loss: 0.1240 - acc: 0.9720
Epoch 8/60
 - 27s - loss: 0.1241 - acc: 0.9720
Epoch 9/60
 - 26s - loss: 0.1236 - acc: 0.9721
Epoch 10/60
 - 27s - loss: 0.1238 - acc: 0.9721
Epoch 11/60
 - 27s - loss: 0.1233 - acc: 0.9723
Epoch 12/60
 - 27s - loss: 0.1232 - acc: 0.9723
Epoch 13/60
 - 27s - loss: 0.1231 - acc: 0.9723
Epoch 14/60
 - 27s - loss: 0.1228 - acc: 0.9724
Epoch 15/60
 - 29s - loss: 0.1229 - acc: 0.9724
Epoch 16/60
 - 31s - loss: 0.1226 - acc: 0.9725
Epoch 17/60
 - 29s - loss: 0.1229 - acc: 0.9724
Epoch 18/60
 - 27s - loss: 0.1224 - acc: 0.9726
Epoch 19/60
 - 27s - loss: 0.1221 - acc: 0.9726
Epoch 20/60
 - 27s - loss: 0.1221 - acc: 0.9727
Epoch 21/60
 - 28s - loss: 0.1219 - acc: 0.9727
E

<keras.callbacks.History at 0x1a3f137160>

In [None]:
#############################################################################################################

In [42]:
# Evaluate accuracy of model
model_loss, model_accuracy = model_2.evaluate(
    X_test, y_test, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Deep Neural Network - Loss: 0.1196103119331254, Accuracy: 0.973316330008407


In [41]:
# SAVE MODEL
# model_2.save("RMA_Model.h5")

In [379]:
transformer.transform(model_df.iloc[1].reshape(-1,2353))

  """Entry point for launching an IPython kernel.


array([[ 191.43507761, -265.18955089,  321.06125052,  241.80984996,
        -111.06554704, -247.92544752, -227.24340147,   -5.11837869,
          68.69715372, -300.28185887,  297.68120072,  102.84191308,
          15.85893387, -383.29242442,  324.4682318 ,  164.16241119,
        -133.35825953,  301.60477386,  252.90192545, -267.1446007 ,
        -169.37362553,   76.34088603,  190.7919311 , -221.75755794,
         221.4079253 , -470.97321756,  177.22670255, -189.29343091,
          95.54445196,  184.67260565, -316.82260231, -106.79002084,
         521.85437504, -116.71742302,  466.58655127, -239.56849454,
         304.56002927,  368.13102034, -103.91809792,   66.30089433,
         648.74497548,  282.37849399,   67.70469738, -155.97436314,
         251.23826382,   66.42547906, -126.75141107, -773.41516613,
         222.61799699,  322.2982339 ,  386.24675933,  392.5210591 ,
         176.31124931, -202.18751609, -111.31321334, -312.5339256 ,
        -149.6447263 ,   35.67062521,  144.82738

In [369]:
model_df.iloc[1]

81Y8237                  0.0
81Y8349                  1.0
95Y0510                  0.0
95Y0512                  0.0
ACX-MIC-16CHE1T1CE       0.0
ACX-MIC-6GE-CU-SFP       0.0
ACX-MIC4OC31OC12CE       0.0
ACX1000-DC               0.0
ACX1100-AC               0.0
ACX1100-DC               0.0
ACX2000-DC               0.0
ACX2100-AC               0.0
ACX2100-AC-BASE          0.0
ACX2100-DC               0.0
ACX2100-DC-BASE          0.0
ACX2200-AC               0.0
ACX2200-DC               0.0
ACX4000-2-6GE-AC         0.0
ACX4000-2-6GE-DC         0.0
ACX4000BASE-AC           0.0
ACX4000BASE-DC           0.0
ACX4K-FANTRAY-M-S        0.0
ACX500-AC                0.0
ACX500-DC                0.0
ACX500-O-AC              0.0
ACX500-O-DC              0.0
ACX500-O-POE-AC          0.0
ACX500-O-POE-DC          0.0
ACX5048-AC               0.0
ACX5048-DC               0.0
                       ...  
WLM-RMTS-100             0.0
WLM-RMTS-1000            0.0
WLM-RMTS-250             0.0
WLM-RMTS-50   

In [278]:
# SAVE TRANSFORMER
# with open('transformer.pickle', 'wb') as handle:
#     pickle.dump(transformer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [189]:
#Save empty dataframe for new points
new_data = model_df.iloc[1]
new_point = pd.DataFrame(new_data).rename(columns={1:"Encoding"})
new_point.replace(1,0, inplace=True)
new_point.replace(2389,0, inplace=True)

# SAVE empty dataframe for new points
new_point.to_csv('New_Point.csv')

In [None]:
#############################################################################################################

In [376]:
### FUNCTION TO TRANSFORM INPUT AND PREDICT NEW POINTS
#Dependencies (Pandas, Keras.models (load_model), sklearn (random_projection))
def RMA_point(product_id, item_age):
    result = transform_point(empty_point(product_id, item_age))
    action = " "
    if result == 1:
        action = "Replace defective"
    else:
        action = "No replacement"
    return action, product_id, result

def transform_point(input_point):
    file = open('transformer.pickle', 'rb')
    xform = pickle.load(file)
    file.close()
    transformed_point = xform.transform(input_point)
    model_2 = load_model("RMA_Model.h5")
    return(model_2.predict_classes(transformed_point)[0])

def empty_point(product_id, item_age):
    point = pd.read_csv('New_Point.csv')
    point = point.set_index('Unnamed: 0')
    point.loc['Lifespan(days)', "Encoding"] = item_age + 90
    point.loc[product_id, "Encoding"] = 1
    input_point = point['Encoding'].reshape(-1,2353)
    return input_point



# MAKING PREDICTIONS HERE

In [377]:
#Making random predictions!!!!
RMA_point("XFP-10GE-SR", 100)



('No replacement', 'XFP-10GE-SR', 0)

In [378]:
RMA_point("RE-400-768-R", 1235)



('Replace defective', 'RE-400-768-R', 1)

In [328]:
proxy_df = pd.read_csv("proxy.csv")

In [332]:
#Analyzing proxy_df
defectives = []
status = []

for row in proxy_df.index:
    result = RMA_point(proxy_df.iloc[row]["IB Material - Key"], proxy_df.iloc[row]['Lifespan(days)'])
    status.append(result[1])
    defectives.append(result[2])

proxy_df['Status'] = status
proxy_df['Defectives'] = defectives

proxy_df



Unnamed: 0.1,Unnamed: 0,IB Material - Key,Lifespan(days),Status,Defectives
0,0,SRX110H2-VA,922.0,0,No replacement
1,1,EX3300-48P,138.0,0,No replacement
2,2,SRX100H2,890.0,0,No replacement
3,3,QFX5100-48S-3AFO,670.0,0,No replacement
4,4,BT8A78CFP1G,286.0,0,No replacement
5,5,EX3400-48T-AFI,488.0,0,No replacement
6,6,MPC-3D-16XGE-SFPP,366.0,0,No replacement
7,7,EX3400-48P,902.0,0,No replacement
8,8,BT7A81CA,231.0,0,No replacement
9,9,NFX250-S1,574.0,0,No replacement


In [359]:
#Define function to count defectives in query
proxy_list = []
def defective_count():
    for each in proxy_df.index:
        if proxy_df.iloc[each]['Status'] == 1:
            proxy_list.append([proxy_df.iloc[each]['IB Material - Key'], proxy_df.iloc[each]['Status']])
            print("Replace: ", proxy_df.iloc[each]['Status'], 
                  " --- ", proxy_df.iloc[each]['IB Material - Key'],
                 "(",proxy_df.iloc[each]['Lifespan(days)'],"days old )")


In [360]:
defective_count()

Replace:  1  ---  QFX-JSL-EDGE-ADV1 ( 992.0 days old )


In [341]:
#Checking math
proxy_df['Defectives'].value_counts()

No replacement       99
Replace defective     1
Name: Defectives, dtype: int64

Unnamed: 0.1,Unnamed: 0,IB Material - Key,Lifespan(days),Status,Defectives
0,0,SRX110H2-VA,1102.0,0,No replacement
1,1,EX3300-48P,318.0,0,No replacement
2,2,SRX100H2,1070.0,0,No replacement
3,3,QFX5100-48S-3AFO,850.0,0,No replacement
4,4,BT8A78CFP1G,466.0,0,No replacement


# IMPORTANT LIMITATIONS

## input data must be be a certain age
## product models with low representation in dataset should be assessed with caution

### Note: 
Must consider way to use RMA_point() on full data_sets or CSV files
For loop with RMA_point()???
Data_set must have Product_ID and Item_Age
Item_age = Today() - Ship_Date