### Prep Work

In [1]:
# Import general libraries
import pandas as pd
import numpy as np

import datetime
import warnings; warnings.simplefilter('ignore')

np.random.seed(42)

In [2]:
# Import NLP libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import spacy
from spacy import displacy
import en_core_web_sm

In [3]:
# Increase size of dataframe
pd.set_option('display.max_columns', 50000)
pd.set_option('display.max_rows', 100000)
pd.options.display.max_colwidth = 500

In [8]:
# Read processed data
data = pd.read_csv('./data/processed_data_new.csv', index_col=0)

### Predict action for a specific log

In [21]:
# new_problem_log = ['COFFEE MAKER 420 WILL NOT SHUTOFF.']
# new_problem_log = ['SEAT 30C NEEDS NEW SEAT CUSHION COVER']
# new_problem_log = ['22DEC BZN DF -Q : CREW REPORT: SEATBELT AT 34E IS COVERED WITH BABY VOMIT SAMC TEXT: W.O. 15122224']
# new_problem_log = ['AFT GALLEY COFFEE MAKER LEAKING.ENTERED INTO SCEPTRE FOR EMPLOYEE #0080038.']
# new_problem_log = ['REMOVE AND REINSTALL JACKSCREW RAIL AND SI DE RAILS REMOVED FOR DRAINMAST INSTALLATION. BY- 087562']
# new_problem_log = ['SEAT CUSHION 12B NEEDS TO BE REPLACED . LOG ENTERED BY G024193, SCEPTRE COMPUTER INDICATES EMPLOYEE NUMBER IS INVALID.	REPLACED CUSHION AND COVER']
# new_problem_log = ['SEAT 27C NEEDS NEW SEAT CUSHION COVER']
# new_problem_log = ['FIRST OFFICERS OXYGEN MASK HARNESS WILL NOT INFLATE']
new_problem_log = ['COFFEE MAKER 812 FLOWING CONSTANTLY']



#### Calculate cosine similarity from tfidf vectors, identify Nouns and Verbs

In [22]:
# Print new problem log
print(f"New Problem Log: {new_problem_log}")
print("-" * 60)

#Instantiate TFIDFVectorizer
tf_log = TfidfVectorizer(ngram_range=(2,8), analyzer='char_wb')

# Fit the model
tf_fit = tf_log.fit_transform(new_problem_log)
tf_logs = tf_log.transform(data['problem_log'])

# Create a Dataframe containing cosine similarities
matching_logs = pd.DataFrame(cosine_similarity(tf_fit, tf_logs),
                             columns=data['problem_log' ]).T
matching_logs.columns=['cos_similarity']

# Merge with the original dataframe
matching_logs = pd.merge(matching_logs,data[['problem_log','corrective_action']],on='problem_log', how='left')

# Sort matching logs to find highest cosine similarity
matching_logs.sort_values(by='cos_similarity', ascending=False, inplace=True)

# Reset index of matching_logs dataframe
matching_logs = matching_logs.reset_index(drop=True)

# cos_similarity = matching_logs['cos_similarity'].nlargest(2).iloc[-1]
print(f"Cosine Similarity: {matching_logs['cos_similarity'].nlargest(2).iloc[-1]}")
print("-" * 60)

# Find problem log for highest cosine similarity
matching_problem_log = matching_logs['problem_log'][1]
print(f"Matching Problem Log: {matching_problem_log}")
print("-" * 60)

# Find corrective action for highest cosine similarity
matching_corrective_action = matching_logs['corrective_action'][1]
print(f"Matching Corrective Action: {matching_corrective_action}")
print("-" * 60)

#### Use spacy to find nouns and verbs

nlp = en_core_web_sm.load()

def find_noun(text):
    doc = nlp(text)
    nouns = [chunk.text for chunk in doc.noun_chunks]
    print(f"Noun phrases:, {nouns}")
    return nouns

def find_verb(text):
    doc = nlp(text)
    verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"]
    print(f"Verb phrases:, {verbs}")
    return verbs

# Find nouns for matching problem log
nouns = find_noun(matching_problem_log)

# Find nouns for matching corrective action
verbs = find_verb(matching_corrective_action)

predictive_action = verbs + nouns
print("-" * 60)
print(f"Predictive Corrective Action: {predictive_action}")

New Problem Log: ['COFFEE MAKER 812 FLOWING CONSTANTLY']
------------------------------------------------------------
Cosine Similarity: 0.8535481794960198
------------------------------------------------------------
Matching Problem Log: fwd coffee maker will not stop brewing water is constantly coming out
------------------------------------------------------------
Matching Corrective Action: removed and replaced coffeemaker at position ops check normalrotable parts change was indicated tt sn off coffeemaker in off on 
------------------------------------------------------------
Noun phrases:, ['fwd coffee maker', 'water']
Verb phrases:, ['remove', 'replace', 'check', 'be', 'indicate']
------------------------------------------------------------
Predictive Corrective Action: ['remove', 'replace', 'check', 'be', 'indicate', 'fwd coffee maker', 'water']


In [23]:
predictive_action

['remove', 'replace', 'check', 'be', 'indicate', 'fwd coffee maker', 'water']