In [9]:
##########################SVM Model####################################
#import necessary libraries
import pandas as pd
import numpy as np
import warnings
import seaborn as sns
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer

#load the sample data from sample.xlsx file
data = pd.read_excel('final.xlsx')

#Dependent and independent features
x=data['Synopsis']
y=data['Risk level']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)

#Training and testing data
print("Training data SVM model\n\n",X_train,"\n\n",y_train)
print("")
print("Testing data for SVM model\n\n",X_test)

#pre-processing
#Initialize the TF-IDF vectorizer
tfidf = TfidfVectorizer(sublinear_tf=True, max_df=0.95, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')

#transform independent variable using TF-IDF vectorizer
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

#Build the SVM model
SVM_model = LinearSVC()

#Use calibrated classifier for calculate test probability
clf1 = CalibratedClassifierCV(SVM_model)

#Fit train and test into the model
clf1.fit(X_train_tfidf, y_train)

#Predict the result
y_pred1 = clf1.predict(X_test_tfidf)

#Evaluate the model# Creating the Confusion Matrix
#cm = confusion_matrix(y_test, y_pred1)

#print("\n","Confusion matrix SVM model\n")
#fig, ax = plt.subplots(figsize=(7,5))
#sns.heatmap(cm, annot=True, fmt='d')
#plt.show()
#print("\n")
#print("Classification report for SVM model\n")
#print(classification_report(y_test, y_pred1))
#print("\n")
#print()
#print("Accuracy score of SVM model",round(accuracy_score(y_test, y_pred1),2)*100)
#print("\n")

##########################Naive Bayes Model####################################
#import necessary libraries
import pandas as pd
import warnings
import seaborn as sns
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import MultinomialNB

#Build the classifier
clf2 = MultinomialNB()

#Fit train and test into the model
clf2.fit(X_train_tfidf, y_train)

#Predict the result
y_pred2 = clf2.predict(X_test_tfidf)

#Evaluate the model# Creating the Confusion Matrix
#from sklearn.metrics import confusion_matrix
#cm = confusion_matrix(y_test, y_pred2)

#print("\n","Confusion matrix Naive Bayes model\n")
#fig, ax = plt.subplots(figsize=(7,5))
#sns.heatmap(cm, annot=True, fmt='d')
#plt.show()
#print("\n")
#print("Classification report for Naive Bayes model\n")
#print(classification_report(y_test, y_pred2))
#print("\n")
#print()
#print("Accuracy score of Naive Bayes model",round(accuracy_score(y_test, y_pred2),2)*100)
#print("\n")

###################################MaxEnt Model###############################
#import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics

def formatop(x,y):
    ind1=x.index
    format1=[]
    fortup=()
    for i in ind1:
        textup=x[i].split()
        fortup=(textup,y[i])
        format1.append(fortup)
    return(format1)

def list_to_dict(words_list):
    return dict([(word, True) for word in words_list])

train_data=formatop(X_train,y_train)
test_data=formatop(X_test,y_test)
 
training_set_formatted = [(list_to_dict(element[0]), element[1]) for element in train_data]

import nltk
numIterations = 10
 
algorithm = nltk.classify.MaxentClassifier.ALGORITHMS[1]
classifier = nltk.MaxentClassifier.train(training_set_formatted, algorithm, max_iter=numIterations)
#classifier.show_most_informative_features(10)

test_set_formatted = [(list_to_dict(element[0]), element[1]) for element in test_data] 

y_pred3=[]
y_test=[]
for i in range(0,len(test_data)):
    y_test.append(test_set_formatted[i][1])
    text = test_set_formatted[i][0]
    y_pred3.append(classifier.classify(text))    
print("\n")
#Conf_Mat = metrics.confusion_matrix(y_test,y_pred3)
#print("The confusion matrix is\n\n",Conf_Mat)

#classification report
#print("Classification report\n",metrics.classification_report(y_test,y_pred3))
#print("Accuracy score : ",metrics.accuracy_score(y_test,y_pred3)*100)

#################################Ensemble model######################

#Training and testing data
print("Training data of Ensemble model\n\n",X_train,"\n\n",y_train)
print("\n")
print("Testing data of Ensemble model\n\n",X_test)
print("\n")

#store all predictions
frame = [y_pred1,y_pred2,y_pred3]
df3 = pd.DataFrame(frame)

#find prediction mode from all three classifiers
En_pred = df3.mode().loc[0]
print("Classification report of Ensemble model\n")
print(classification_report(y_test, En_pred))

print("Confusion matrix of ensemble model\n")
print(confusion_matrix(y_test, En_pred))
print("\n")

#test our model on the test data
print("Accuracy score of ensemble model : ",metrics.accuracy_score(y_test, En_pred)*100)
print("\n")

#plot precision recall graph
plt.rcParams["figure.figsize"] = [16,10]
import scikitplot as skplt
probas = clf1.predict_proba(X_test_tfidf)
skplt.metrics.plot_precision_recall(y_test, probas)
plt.title("Precision & Recall graph for Ensemble model")
plt.show()


############################RNN Model#################################
#import necessary libraries
import os
os.environ['KERAS_BACKEND'] = 'theano'
import time
import random
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from keras.layers import Flatten
from keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.utils import np_utils
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

#load the sample data from sample.xlsx file
data = pd.read_excel('\Users\abrar\OneDrive\سطح المكتب/final.xlsx')

#Make it as a data frame
df = pd.DataFrame(data)
#print(df.columns)

#Convert string into numbers
df1 = df.apply(lambda s: s.map({k:i for i,k in enumerate(s.unique())}))

co = df1.corr(method='pearson')
#print(co)

a = df1[['Date', 'Local Time Of Day','Crew Size','Aircraft Operator','Light','Operating Under FAR Part','Flight Plan', 'Mission', 'Flight Phase','Location Of Person','Human Factors','Anomaly', 'Detector','When Detected', 'Result', 'Contributing Factors / Situations','Primary Problem','Risk level']]

b = df['Risk level']

final = [a,b]

#Concatenate the independent and dependent variables
finaldf = pd.concat(final,axis=1)

#feature selection
X1 = finaldf.iloc[:,0:17]
y1 = finaldf.iloc[:,17]

#Split the data into train and testing
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(X1, y1, test_size=0.1, random_state=0)

#Print training data
print("Training data\n",X_train1,"\n",Y_train1)
print("\n\n")

#Print testing data
print("Testing data\n",X_test1)
print("\n\n")

#make dependent variable categorical
Y_train1 = np_utils.to_categorical(Y_train1,num_classes=3)
Y_test1 = np_utils.to_categorical(Y_test1,num_classes=3)

embed_dim = 128
lstm_out = 300
batch_size = 50

#Build Recurrent neural networks
model = Sequential()
model.add(Embedding(2500, embed_dim,input_length = X_train1.shape[1]))
model.add(LSTM(lstm_out))
model.add(Dense(3,activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

#Here we train the Network.
start_time = time.time()
model.fit(X_train1, Y_train1, batch_size = batch_size, epochs = 5,  verbose = 2)
end_time = time.time()
elapsed_time = end_time - start_time
print("Time to train model: %.3f seconds" % elapsed_time)

start_time = time.time()
score,acc = model.evaluate(X_test1,Y_test1,verbose = 2,batch_size = batch_size)
end_time = time.time()
elapsed_time = end_time - start_time
print("Time to evaluate model: %.3f seconds" % elapsed_time)
print("\n")


#Predict the test results
prediction = model.predict(X_test1)
length = len(prediction)
y_label = np.argmax(Y_test1,axis=1)
predict_label = np.argmax(prediction,axis=1)

#classification report
print("Confusion Matrix of RNN\n",confusion_matrix(y_label,predict_label))
print("\n")
print("Classification Report of RNN\n",classification_report(y_label,predict_label))
print("\n")
print("Accuracy : ",(accuracy_score(y_label,predict_label)*100))
print("\n")

#Precision-recall graph
plt.rcParams["figure.figsize"] = [16,10]
import scikitplot as skplt
probas = model.predict_proba(X_test1)
skplt.metrics.plot_precision_recall(y_label, probas)
plt.title("Precision & Recall graph for RNN model")
plt.show()

#predict classes for test data
RNN_y_pred = model.predict_classes(X_test1)

##################################Final model########################
#store all predictions  
frame1 = [En_pred,RNN_y_pred]
df4 = pd.DataFrame(frame1)

#final prediction from Ensemble and RNN
fl_pred = df4.loc[0]

#Classification report
print("Classification report of combined model\n")
print(classification_report(y_test, fl_pred))

#Accuracy
a = (metrics.accuracy_score(y_test, fl_pred)*100)
print("Accuracy of the combined model : ",round(a))

#plot precision recall graph
plt.rcParams["figure.figsize"] = [16,10]
import scikitplot as skplt
probas = clf1.predict_proba(X_test_tfidf)
skplt.metrics.plot_precision_recall(y_test, probas)
plt.title("Precision & Recall graph for Combined model")
plt.show()

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-1: truncated \UXXXXXXXX escape (<ipython-input-9-7af0ec340793>, line 201)

In [8]:
f


NameError: name 'f' is not defined