In [1]:
import pandas as pd
import numpy as np 

# LOAD AND PERFORM BASIC CHECKS FOR DATASET

In [2]:
# Data Source: XML exports of past Maintenance Action Form (MAF)
# records extracted from the Decision Knowledge
# Programming for Logistics Analysis and Technical
# Evaluation (DECKPLATE) system, the NAVAIR data
# warehouse for aircraft maintenance, flight, and usage
# data.


# Load test, validation and train datasets
val_x = pd.read_csv("./Data/development-x.csv")
val_y = pd.read_csv("./Data/development-y.csv")
# Create a training dataset
train_x = pd.read_csv("./Data/training-x.csv")
train_y = pd.read_csv("./Data/training-y.csv")
# Create a testing dataset
test_x = pd.read_csv("./Data/test-x.csv")
test_y = pd.read_csv("./Data/test-y.csv")

In [3]:
# Rename first column
train_x.rename(columns={'Unnamed: 0':'Code'},inplace=True)
train_y.rename(columns={'Unnamed: 0':'Code'},inplace=True)
test_x.rename(columns={'Unnamed: 0':'Code'},inplace=True)
test_y.rename(columns={'Unnamed: 0':'Code'},inplace=True)
val_x.rename(columns={'Unnamed: 0':'Code'},inplace=True)
val_y.rename(columns={'Unnamed: 0':'Code'},inplace=True)

In [4]:
train_x.head()

Unnamed: 0,Code,DISCREPANCY
0,2266,rh nose landing gear tire has multiple cuts ex...
1,8993,mrh accumulator low
2,5059,hole in yellow tail rotor boot. mrc-300 card 1...
3,4936,ib hydraulic hose for lh mlg brake pressure ha...
4,3846,mrh fluid level is low


In [21]:
train_x.shape,train_y.shape

((22407, 2), (22407, 2))

In [5]:
train = train_x.merge(train_y,how='inner',on='Code')
test  = test_x.merge(test_y,how='inner',on='Code')
val   = val_x.merge(val_y,how='inner',on='Code')

In [6]:
print(val[val['Code']==1644]['DISCREPANCY'][0])
print(val[val['Code']==1644]['DISCREPANCY'][1])
print("**************************************************")
print(val[val['Code']==1644]['CORRECTIVE ACTION'][0])
print(val[val['Code']==1644]['CORRECTIVE ACTION'][1])
print("**************************************************")

mrh hyd level in the red.          
mrh hyd level in the red.          
**************************************************
serviced mrh accum to 1675 psi, high green, oat: 22*c iaw gai-000. current a/c hours: 1,316.200. inspected all dampers for leak. checks good area fod free and secure.  
removed and replaced lh nlg tire iaw sss 3240. ataf apaf aff.
**************************************************


In [12]:
train_x[train_x['Code']==5059]

Unnamed: 0,Code,DISCREPANCY
2,5059,hole in yellow tail rotor boot. mrc-300 card 1...
11832,5059,rh nose landing gear tire worn.


In [13]:
train_y[train_y['Code']==5059]

Unnamed: 0,Code,CORRECTIVE ACTION
2,5059,applied rtv to hole in yellow tail rotor boot ...
11832,5059,replaced rh nlg tire iaw sss 3240. used pema 6...


In [17]:
train_y['CORRECTIVE ACTION'].isnull().sum()

545

In [14]:
# print("Discrepancies corresponding to 'Unnamed: 0' 5059 are: ")
# print(train_x[train_x['Code']==5059]['DISCREPANCY'][2])
# print(train_x[train_x['Code']==5059]['DISCREPANCY'][11832])
# print("**************************************************")
# print("Corrective Actions corresponding to 'Unnamed: 0' 5059 are: ")
# print(train_y[train_y['Code']==5059]['CORRECTIVE ACTION'][2])
# print(train_y[train_y['Code']==5059]['CORRECTIVE ACTION'][11832])
# print("**************************************************")

In [19]:
train.shape[0]

27105

In [22]:
train[train['DISCREPANCY']=='red upper pcr bolt is worn out of limits.']

Unnamed: 0,Code,DISCREPANCY,CORRECTIVE ACTION
45,5002,red upper pcr bolt is worn out of limits.,
46,5002,red upper pcr bolt is worn out of limits.,replaced worn pcr bolt iaw: a1-h60ra-150-300. ...


In [23]:
print("For Corr Action Text: ")
print("Repeated Corr Act percentages train is: ", 100*(1-len(set(train['CORRECTIVE ACTION'].tolist()))/train.shape[0]))
print("Repeated Corr Act percentages test is: ", 100*(1-len(set(test['CORRECTIVE ACTION'].tolist()))/test.shape[0]))
print("Repeated Corr Act percentages val is: ", 100*(1-len(set(val['CORRECTIVE ACTION'].tolist()))/val.shape[0]))

For Corr Action Text: 
Repeated Corr Act percentages train is:  21.674967718133185
Repeated Corr Act percentages test is:  5.662580268534734
Repeated Corr Act percentages val is:  9.60742505957607


In [29]:
train["D_CA"] = train['DISCREPANCY']+"__"+train['CORRECTIVE ACTION']
test["D_CA"]  = test['DISCREPANCY']+"__" +test['CORRECTIVE ACTION']
val["D_CA"]   = val['DISCREPANCY']+"__"+val['CORRECTIVE ACTION']

# PROMPT CREATION EXISTING 

In [25]:
import csv, random

f_x = csv.reader(open("Data/training-x.csv"))
f_y = csv.reader(open("Data/training-y.csv"))

train_x = {}
train_y = {}

for idx, row in enumerate(f_x):
    train_x[idx] = row[1]

for idx, row in enumerate(f_y):
    train_y[idx] = row[1]

print(len(train_x), len(train_y))

pairs = []
for idx in train_x:
    if train_x[idx] != '' and train_y[idx] != '':
        pairs.append((train_x[idx], train_y[idx]))

22408 22408


In [26]:
pairs

[('DISCREPANCY', 'CORRECTIVE ACTION'),
 ('rh nose landing gear tire has multiple cuts exceeding the 4/32 cut limit          ',
  'removed and replaced rh nlg tire iaw sss 3240 pema 9964 ataf apaf aff.  '),
 ('mrh accumulator low',
  'serviced mrh accumulator to second green iaw gai-000 nitrogen pressor 1600 psi oat 29c. flight hours 3530.1 needs conditional. area clean secure and fod free. pema s/n 6cksa67650'),
 ('hole in yellow tail rotor boot. mrc-300 card 11.12',
  'applied rtv to hole in yellow tail rotor boot iaw srm-000. all checks good. area fod free and secure.'),
 ('mrh fluid level is low          ',
  'servicing not required iaw: gai-000. area clean and fod free. pema used: 6fksa75955  '),
 ('main rotor head low          ',
  'serviced mrh accumulator iaw: a1-h60ra-gai-000. hydraulics gauge high green and nitrogen gauge 1450 psi using hsu 229 and n2 bottle 017 at 22 degrees cel. aircraft flight hours 1227.9. all checks good area fod free and secure.  '),
 ('mrh hydraulic flu

In [27]:
f = open('Data/gpt3.txt', 'w')
subset = random.sample(pairs, 25)
for pair in subset:
    f.write('<DISCREPANCY> %s <ACTION> %s\n' % (pair[0].strip(), pair[1].strip()))