# FailureSense â€“ NLP Basics (Maintenance Log Analysis)

Objective:
- Demonstrate basic NLP preprocessing techniques
- Apply NLP concepts to simulated maintenance log text
- Understand text representation for machine learning


In [1]:
import pandas as pd
import numpy as np

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer


downloading nltk resources

In [2]:
nltk.download("punkt")
nltk.download("stopwords")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ACSS\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ACSS\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

creating maintenance logs

In [3]:
maintenance_logs = [
    "Machine stopped due to high temperature",
    "Tool wear exceeded safe limit",
    "Unexpected power failure during operation",
    "Overstrain detected in spindle motor",
    "Routine maintenance completed successfully",
    "Machine operating under normal conditions"
]

df_logs = pd.DataFrame({"log_text": maintenance_logs})
df_logs


Unnamed: 0,log_text
0,Machine stopped due to high temperature
1,Tool wear exceeded safe limit
2,Unexpected power failure during operation
3,Overstrain detected in spindle motor
4,Routine maintenance completed successfully
5,Machine operating under normal conditions


Text preprocessing

In [4]:
df_logs["log_text_clean"] = df_logs["log_text"].str.lower()


tokenization

In [6]:
import nltk
nltk.download("punkt_tab")


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\ACSS\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [7]:
df_logs["tokens"] = df_logs["log_text_clean"].apply(word_tokenize)
df_logs


Unnamed: 0,log_text,log_text_clean,tokens
0,Machine stopped due to high temperature,machine stopped due to high temperature,"[machine, stopped, due, to, high, temperature]"
1,Tool wear exceeded safe limit,tool wear exceeded safe limit,"[tool, wear, exceeded, safe, limit]"
2,Unexpected power failure during operation,unexpected power failure during operation,"[unexpected, power, failure, during, operation]"
3,Overstrain detected in spindle motor,overstrain detected in spindle motor,"[overstrain, detected, in, spindle, motor]"
4,Routine maintenance completed successfully,routine maintenance completed successfully,"[routine, maintenance, completed, successfully]"
5,Machine operating under normal conditions,machine operating under normal conditions,"[machine, operating, under, normal, conditions]"


stopword removal

In [8]:
stop_words = set(stopwords.words("english"))

df_logs["tokens_filtered"] = df_logs["tokens"].apply(
    lambda x: [word for word in x if word.isalpha() and word not in stop_words]
)

df_logs


Unnamed: 0,log_text,log_text_clean,tokens,tokens_filtered
0,Machine stopped due to high temperature,machine stopped due to high temperature,"[machine, stopped, due, to, high, temperature]","[machine, stopped, due, high, temperature]"
1,Tool wear exceeded safe limit,tool wear exceeded safe limit,"[tool, wear, exceeded, safe, limit]","[tool, wear, exceeded, safe, limit]"
2,Unexpected power failure during operation,unexpected power failure during operation,"[unexpected, power, failure, during, operation]","[unexpected, power, failure, operation]"
3,Overstrain detected in spindle motor,overstrain detected in spindle motor,"[overstrain, detected, in, spindle, motor]","[overstrain, detected, spindle, motor]"
4,Routine maintenance completed successfully,routine maintenance completed successfully,"[routine, maintenance, completed, successfully]","[routine, maintenance, completed, successfully]"
5,Machine operating under normal conditions,machine operating under normal conditions,"[machine, operating, under, normal, conditions]","[machine, operating, normal, conditions]"


Text vectorizaton (TF-IDF)

In [9]:
vectorizer = TfidfVectorizer()
X_tfidf = vectorizer.fit_transform(df_logs["log_text_clean"])


In [10]:
X_tfidf.toarray()


array([[0.        , 0.        , 0.        , 0.4198708 , 0.        ,
        0.        , 0.        , 0.4198708 , 0.        , 0.        ,
        0.34430007, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.4198708 , 0.        , 0.4198708 , 0.4198708 ,
        0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.4472136 , 0.        , 0.        , 0.        , 0.4472136 ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.4472136 ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.4472136 , 0.        , 0.        , 0.4472136 ],
       [0.        , 0.        , 0.        , 0.        , 0.4472136 ,
        0.        , 0.4472136 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
  

Inspect vocabulary

In [11]:
vectorizer.get_feature_names_out()


array(['completed', 'conditions', 'detected', 'due', 'during', 'exceeded',
       'failure', 'high', 'in', 'limit', 'machine', 'maintenance',
       'motor', 'normal', 'operating', 'operation', 'overstrain', 'power',
       'routine', 'safe', 'spindle', 'stopped', 'successfully',
       'temperature', 'to', 'tool', 'under', 'unexpected', 'wear'],
      dtype=object)

### NLP Observations

- Text preprocessing converts unstructured maintenance logs into structured representations
- TF-IDF captures the importance of terms across documents
- NLP techniques can complement sensor-based systems for explainability
- This module is included for educational demonstration purposes
