### Checking for Requirements 

In [None]:
#!pip install scikit-learn
#!pip install tqdm
#!pip install seaborn
#!python -m pip install --upgrade pip
#!python -m pip install --upgrade setuptools wheel
#!pip install streamlit

### Importing required packages

In [None]:
import pandas as pd
import streamlit as st 
import pickle
import seaborn as sns
import warnings
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook, tqdm
from sklearn.model_selection import train_test_split as tts
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report,confusion_matrix, ConfusionMatrixDisplay

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import all_estimators

### Providing file locations

In [None]:
failuresloc      = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/failures.csv"
telemetryloc     = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/telemetry.csv"
errorsloc        = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/errors.csv"
machinesloc      = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/machines.csv"
telemetryfeatloc = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/telemetryfeat.csv"
errorfeatloc     = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/errorfeat.csv"
featuresloc      = "C:/Users/Rohan/Downloads/AMLWorkshop-master/AMLWorkshop-master/Data/features.csv"

### Loading Files

In [None]:
for i in tqdm_notebook(range(0,7), ncols = 1000, desc = "Loading"): 
    FailureDF       = pd.read_csv(failuresloc         ,low_memory = False)
    TelemetryDF     = pd.read_csv(telemetryloc        ,low_memory = False)   
    ErrorsDF        = pd.read_csv(errorsloc           ,low_memory = False)
    MachinesDF      = pd.read_csv(machinesloc         ,low_memory = False)
    TelemetryFeatDF = pd.read_csv(telemetryfeatloc    ,low_memory = False)
    ErrorFeatDF     = pd.read_csv(errorfeatloc        ,low_memory = False)
    FeaturesDF      = pd.read_csv(featuresloc         ,low_memory = False)
print("Data Loaded")

FailureDF.name       = "FailureDF"
FeaturesDF.name      = "FeaturesDF"
TelemetryDF.name     = "TelemetryDF"
ErrorsDF.name        = "ErrorsDF" 
MachinesDF.name      = "MachinesDF" 
TelemetryFeatDF.name = "TelemetryFeatDF"
ErrorFeatDF.name     = "ErrorFeatDF"

headlis = [FailureDF,TelemetryDF,ErrorsDF,TelemetryFeatDF,ErrorFeatDF,FeaturesDF]

### Converting date time feature to a similar format

In [None]:
for i in headlis:
    print(F"{i.name}", end = '\n')
    for j in tqdm_notebook(range(1),ncols = 1000, desc = "Converting"):
        i['datetime'] = pd.to_datetime(i['datetime'])
        i['datetime'] = i['datetime'].dt.strftime('%y-%m-%d')    
        print(i['datetime'].head(1))

### Sorting al data frames based on time stamps 

In [None]:
print('_'*140)
for i in headlis:
    if i.name == "MachinesDF":
        continue
    print(f"{i.name} :\n")
    cols = "datetime"
    for j in tqdm_notebook(range(1),ncols = 1000, desc = "Sorting"): 
        i.sort_values(by = cols,inplace=True, axis=0)
    print('_'*140)

### Merging all DF's Together

In [None]:
FeaturesDF_ = TelemetryFeatDF.merge(FailureDF, on = 'datetime')

In [None]:
FeaturesDF_ = FeaturesDF_.merge(ErrorsDF, on = 'datetime')

In [None]:
FeaturesDF_ = FeaturesDF 
FeaturesDF

### Exploring the DataFrames

In [None]:
for i in headlis:
    print('_'*100)
    print(f"\nHEAD ({i.name}) :",end = '\n')
    print(i.head(1))

In [None]:
for i in headlis:
    print('_'*100,f'\n{i.name}')
    print(f"\nTAIL ({i.name}) :",end = '\n')
    print(i.tail(1))

In [None]:
for i in headlis:
    print('_'*100,f'\n{i.name}')
    print(f"\nDTYPES ({i.name}) :",end = '\n') 
    print(i.dtypes)

In [None]:
for i in headlis:
    print('_'*100,f'\n{i.name}')
    print(f"\nMISSVAL ({i.name}) :",end = '\n')
    print(i.isna().sum())    

### Finding maximum and minimum timestamp date common in all DF's

In [None]:
MinTS = []
MaxTS = []
for i in headlis:
    if i.name == "MachinesDF":
        continue
    MinTS.append(i.iloc[0,0])
    MaxTS.append(i.iloc[-1,0])
print(MinTS)
print(MaxTS)
Minimum_Time_Stamp = max(MinTS)
Maximum_Time_Stamp = min(MaxTS)
print(f"Minimum Time Stamp = {Minimum_Time_Stamp}")
print(f"Maximum Time Stamp = {Maximum_Time_Stamp}")

### Trimming all DF's to value points between min and max time stamps

In [None]:
for i in headlis:
    print('_'*140)
    i = i.loc[~((i['datetime'] <= Minimum_Time_Stamp) & (i['datetime'] >= Maximum_Time_Stamp))]
    print(i.head(1))
    print(i.tail(1))
    print('_'*140)

### Dropping date time column after merging

In [None]:
FeaturesDF.drop(columns= ['datetime'], inplace = True)

### Pairplots showing MEAN and SD overlap

In [None]:
sns.pairplot(data = FeaturesDF[['voltmean','voltsd']])

In [None]:
sns.pairplot(data = FeaturesDF[['rotatemean','rotatesd']])

In [None]:
sns.pairplot(data = FeaturesDF[['pressuremean','pressuresd']])

In [None]:
sns.pairplot(FeaturesDF[['vibrationmean','vibrationsd']])

### Age based trend on voltage, rotation, vibration, pressure

In [None]:
sns.lineplot(x = 'age', y = 'pressuremean', data = FeaturesDF, label = 'Pressure-Mean', color = 'orange')
plt.xlabel("AGE")
plt.ylabel("Pressure")
plt.title("Age based trend graph")
plt.legend()
plt.show()

In [None]:
sns.lineplot(x = 'age', y = 'voltmean', data = FeaturesDF, label = 'Volt-Mean', color = 'red')
plt.xlabel("AGE")
plt.ylabel("Voltage")
plt.title("Age based trend graph")
plt.legend()
plt.show()

In [None]:
sns.lineplot(x = 'age', y = 'rotatemean', data = FeaturesDF, label = 'Rotate-Mean', color = 'green')
plt.xlabel("AGE")
plt.ylabel("Rotation")
plt.title("Age based trend graph")
plt.legend()
plt.show()

In [None]:
sns.lineplot(x = 'age', y = 'vibrationmean', data = FeaturesDF, label = 'Vibration-Mean', color = 'blue')
plt.xlabel("AGE")
plt.ylabel("Vibration")
plt.title("Age based trend graph")
plt.legend()
plt.show()

### Encoding Data 

In [None]:
LE =  LabelEncoder()
FeaturesDF['machineID'] = LE.fit_transform(FeaturesDF['machineID'])
FeaturesDF['model']     = LE.fit_transform(FeaturesDF['model'])
FeaturesDF.loc[FeaturesDF['failure'] == 'True' ,'failure'] = '1'
FeaturesDF.loc[FeaturesDF['failure'] == 'False','failure'] = '0'
FeaturesDF = FeaturesDF.astype({'failure' : int})
FeaturesDF

### Ensuring that both the cases are equally provided in the dataset

In [None]:
XTrue  = FeaturesDF.loc[FeaturesDF['failure'] == 1]
XFalse = FeaturesDF.loc[FeaturesDF['failure'] == 0]
XTrue  = XTrue[:5000][:]
XTrue

In [None]:
XFalse = XFalse[:5000][:]
XFalse

In [None]:
XFinal = [XTrue,XFalse]
FeaturesDF = pd.concat(XFinal, ignore_index = True)
FeaturesDF

### Seperating Dataset into X and Y 

In [None]:
X = FeaturesDF.drop(columns= ['failure'])
Y = FeaturesDF.iloc[:]['failure']

### Splitting Dataset into Training, Testing and Validation 

In [None]:
#X = X.iloc[::3][:]
#Y = Y.iloc[::3][:]

In [None]:
x_train,x_val,y_train,y_val            = tts(X, Y, test_size = 0.30, random_state = 42)
x_test,x_validate,y_test,y_validate    = tts(x_val, y_val, test_size = 0.30, random_state = 42) 

In [None]:
print('TRAINING DATA SET \n','-_'*40)
print(x_train.head(1),'\n', x_train.shape, '\n', '_'*80,'\n', '\n',y_train.head(1),y_train.shape) 

In [None]:
print('TESTING DATA SET \n','-_'*40)
print(x_test.head(1),'\n', x_test.shape, '\n', '_'*80,'\n', '\n',y_test.head(1),y_test.shape)

In [None]:
print('VALIDATION DATA SET \n','-_'*40)
print(x_validate.head(1),'\n', x_validate.shape, '\n', '_'*80,'\n', '\n',y_validate.head(1),y_validate.shape)

### Scaling Data

In [None]:
Scaler = StandardScaler()
Scaler.fit(x_train)
x_train    = Scaler.fit_transform(x_train)
x_test     = Scaler.fit_transform(x_test)
x_validate = Scaler.fit_transform(x_validate)

### Creating Machine Learning Models and evaluating which one is the best 

In [None]:
Classifiers = all_estimators(type_filter = 'classifier')

ACCResults = []
with tqdm(total = len(Classifiers), desc = 'Pls Wait') as pbar: 
    for name, Classifier_model in Classifiers:
        pbar.set_description(f"Running {name}")
        pbar.update(1)
        try:
            model  = Classifier_model()
            model.fit(x_train,y_train)
            y_pred = model.predict(x_test)
            
            accuracy_of_model = accuracy_score(y_test, y_pred)
            report_of_model   = classification_report(y_test, y_pred,output_dict = True)
    
            ACCResults.append({
                'Model'      : name,
                'Accuracy'   : accuracy_of_model,
                'Precision'  : report_of_model['weighted avg']['precision'], 
                'Recall'     : report_of_model['weighted avg']['recall'],
                'F1-Score'   : report_of_model['weighted avg']['f1-score']
            })
            print(f"MODEL : {name} ->","\033[32m PASSED\033[0m")
        except Exception as e:
            print(f"MODEL : {name} ->","\033[31mFAILED\033[0m", ":\033[33m Error : \033[0m",f"\033[48;2;106;0;0m{e}\033[0m")
ACCDF = pd.DataFrame(ACCResults)
ACCDF = ACCDF.sort_values(by = 'Accuracy', ascending = False)
print('-_'*60)
print(ACCDF)
print('-_'*60)

### Running Decision Tree Classifier model 

In [None]:
for name,i in Classifiers:
    if name == "HistGradientBoostingClassifier":
        ML = i 
model = ML() 
model.fit(x_train,y_train)
y_pred = model.predict(x_test)
y_valid= model.predict(x_validate)
print("Test Report\n",'-_'*60)
print(classification_report(y_test,y_pred))
print()
print("Validation Test Report\n",'-_'*60)
print(classification_report(y_validate,y_valid))
cm1 = confusion_matrix(y_test,y_pred)
cm2 = confusion_matrix(y_validate,y_valid)
CMD1 = ConfusionMatrixDisplay(confusion_matrix = cm1)
CMD1.plot()
CMD2 = ConfusionMatrixDisplay(confusion_matrix = cm2)
CMD2.plot()

In [None]:
ran = pd.DataFrame(FeaturesDF.loc[FeaturesDF['failure'] == 0])
ran = ran.sample(n=1)
print(ran)
ran = ran.drop(columns = ['failure'])
print(ran)

In [None]:
ran = model.predict(ran)
print(ran)

In [None]:
rand1 = pd.DataFrame(FeaturesDF.loc[FeaturesDF['failure'] == 1])
rand1 = rand1.sample(n=1)
print(rand1)
rand1 = rand1.drop(columns = ['failure'])
print(rand1)

In [None]:
model.predict(rand1)

### Download ML Model as pkg for Streamlit

In [None]:
XTRDF = pd.DataFrame(x_train)
with tqdm(range(2), desc = "Exporting Pickle File") as pbar:
    with open("D:/PMUI/MODEL_FILES/RFCV1.pkl", 'wb') as files: 
        pickle.dump(model, files)
        pbar.set_description("Export Successfull")
        pbar.update(1)
    with open("D:/PMUI/MODEL_FILES/xtr.csv", 'wb') as files: 
        XTRDF.to_csv(files)
        pbar.set_description("Export Successfull")
        pbar.update(1)

### Streamlit Deployment 

In [None]:
# import streamlit as st
# import pandas as pd
# import pickle
# import json
# import requests
# from streamlit_lottie import st_lottie
# from sklearn.preprocessing import LabelEncoder, StandardScaler

# fail_logo_path = "D:/PMUI/fail1.json"
# pass_logo_path = "D:/PMUI/pass.json"
# pickle_url = "https://github.com/RohanGJ/PMUMLAi/raw/refs/heads/master/RFCV1.pkl"

# def load_lottiefile(filepath: str):
#   with open(filepath, "r") as f:
#     return json.load(f)

# def load_model_from_github(url):
#   response = requests.get(url)
#   response.raise_for_status()
#   model = pickle.loads(response.content)
#   st.error(f"MODEL LOADED from  : {url}")
#   return model

# model = load_model_from_github(pickle_url)

# st.title('MACHINE LEARNING')
# st.info("Preventine Maintanance")

# E1C,E2C,E3C,E4C,E5C = 0,0,0,0,0

# Vmean,Rmean,Pmean,VBmean = 0.00,0.00,0.00,0.00

# Vsd,Rsd,Psd,VBsd = 0.00,0.00,0.00,0.00

# tab1,tab2,tab3,tab4,tab5,tab6 = st.tabs(["Error_count","Voltage","Rotations","Pressure","Vibration","MISC"])

# with tab1:
#   st.write('Provide Error Counts for each attributes')
#   E1C    = st.selectbox('Error1Count',[0,1,2])
#   E2C    = st.selectbox('Error2Count',[0,1,2])
#   E3C    = st.selectbox('Error3Count',[0,1,2])
#   E4C    = st.selectbox('Error4Count',[0,1,2])
#   E5C    = st.selectbox('Error5Count',[0,1,2])
  
# with tab2:
#   st.write('Provide Mean and SD of VTin')
#   Vmean  = st.slider('Mean Voltage',150.00,220.00,170.00)
#   Vsd    = st.slider('Standard Deviation (Voltage)',6.50,27.50,14.50)

# with tab3:
#   st.write('Provide Mean and SD of RTin')
#   Rmean  = st.slider('Mean Rotation',260.00,500.00,450.00)
#   Rsd    = st.slider('Standard Deviation (Rotation)',19.00,105.00,50.00)

# with tab4:
#   st.write('Provide Mean and SD of PRin')
#   Pmean  = st.slider('Mean Pressure',90.00,155.00,100.00)
#   Psd    = st.slider('Standard Deviation (Pressure)',4.00,29.00,9.50)

# with tab5:
#   st.write('Provide Mean and SD of VBin')
#   VBmean  = st.slider('Mean Vibration',35.00,65.00,40.00)
#   VBsd    = st.slider('Standard Deviation (Vibration)',2.00,14.00,5.00)
# with tab6:
#   MID    = st.slider('Machine ID',0,99,1)
#   AGE    = st.slider('Age',0,20,1)
#   modrang = list(range(0,100))
#   MODEL  = st.selectbox('Model', modrang)

# FinLis = {
#       'machineID'    : [MID],
#       'voltmean'     : [Vmean],
#       'rotatemean'   : [Rmean],
#       'pressuremean' : [Pmean],
#       'vibrationmean': [VBmean], 
#       'voltsd'       : [Vsd],
#       'rotatesd'     : [Rsd], 
#       'pressuresd'   : [Psd], 
#       'vibrationsd'  : [VBsd],
#       'error1count'  : [E1C],
#       'error2count'  : [E2C], 
#       'error3count'  : [E3C], 
#       'error4count'  : [E4C], 
#       'error5count'  : [E5C],
#       'model'        : [MODEL], 
#       'age'          : [AGE], 
#   }

# STDF = pd.DataFrame(FinLis)

# def color_df(val):
#   if val > 0 and val < 100:
#     color = 'blue'
#   elif val > 99:
#     color = 'green'
#   elif val == 0:
#     color = 'red'
#   else:
#     color = 'orange'
#   return f'background-color: {color}'

# st.info("USER INPUTS :")
# st.dataframe(STDF.iloc[:,[0,1,2,3,4,5,6,15]].style.applymap(color_df))
# st.dataframe(STDF.iloc[:,7:-1].style.applymap(color_df))
# #st.write(STDF[:][:8])

# pred = []
# STDF = STDF.values
# pred = list(model.predict(STDF))

# col1, col2 = st.columns(2, gap = "small")

# col1.info("MODEL Eval")
# col1.subheader("Prediction : ")
# col1.write(model.predict(STDF))

# col1.subheader("Prediction Probability :")
# col1.write(model.predict_proba(STDF))
  
# lottie_pass = load_lottiefile(pass_logo_path)
# lottie_fail = load_lottiefile(fail_logo_path)

# with col2:
#   st.info("STATUS")
#   if pred[0] == 0:
#     st_lottie(lottie_pass, speed = 0.75, loop = False)
#   elif pred[0] == 1:
#    st_lottie(lottie_fail, speed = 0.75, loop = True, reverse = True) 
#   else:
#     st.info("Press Run model after selecting inputs")