In [158]:
''' 
    /*----------------------------- IMPORT_LIBRARIES -------------
'''
import re
import string
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder     
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier          
from sklearn.model_selection import train_test_split
from prettytable import PrettyTable   
from sklearn import preprocessing

import warnings
warnings.filterwarnings("ignore")

In [159]:
''' 
    /*-------------------- LOAD_TRAINING_DATA ----------------
    | Function  : read_csv()
    | Purpose   : Read a Dataset in CSV file Format 
    | Arguments : 
    |       path    : Path to dataset file
    |       dataset : Dataset file name
    | Return    :
    |       dataset : Dataset in DataFrame Format
    *---------------------------------------------------------*/
'''

# Read the Data in CSV Format
training_data = pd.read_csv('C:/Users/Saqib Ali/Desktop/Preprocessed Data/output_dataset_train.csv', encoding='cp1252')
training_data = pd.DataFrame(training_data)
pd.set_option("display.max_rows", None, "display.max_columns", None)
#Load the Training Data
print("Training Data:")
print("=============\n")
print(training_data)

Training Data:

                                                 tweets       label
0     My Token No. is KOL06442019200931 Sir Requesti...      Urgent
1     My father didn't tell me how to live. He lived...  Not-Urgent
2     My twitter people you need to Join the convers...      Urgent
3     mystery Package left CT on 16 with delivery in...  Not-Urgent
4     Nah. But it sounds rude. Phonecall should stri...      Urgent
5     Name any other job where this level of incompe...  Not-Urgent
6     National assembly renovation more urgent than ...      Urgent
7     My Fathers Day has begun by needing to take my...  Not-Urgent
8     Need a plumber in Greenville Wisconsin? -877-d...      Urgent
9     Need a plumber in Lower Waterford Vermont? -87...  Not-Urgent
10    Need a plumber in Ponca City Oklahoma? -877-dr...      Urgent
11    Nah don't bother buying one Saka and Martinell...  Not-Urgent
12    Need A+ O+ Narayana Hrudayalaya Hospital Hosur...      Urgent
13    My favorite countdown time

In [160]:
''' 
    /*-------------------- LOAD_TESTING_DATA ----------------
    | Function  : read_csv()
    | Purpose   : Read a Dataset in CSV file Format 
    | Arguments : 
    |       path    : Path to dataset file
    |       dataset : Dataset file name
    | Return    :
    |       dataset : Dataset in DataFrame Format
    *---------------------------------------------------------*/
'''

# Read the Data in CSV Format
testing_data = pd.read_csv('C:/Users/Saqib Ali/Desktop/Preprocessed Data/output_dataset-test.csv')
testing_data = pd.DataFrame(testing_data)
pd.set_option("display.max_rows", None, "display.max_columns", None)
#Load the Training Data
print("Testing Data:")
print("============\n")
print(testing_data)

Testing Data:

                                                tweets       label
0    its actually battery which is unwanted touchin...  Not-Urgent
1    trans rights are under threat in the uk under ...      Urgent
2    Sir me and my family are permanent residents o...  Not-Urgent
3    Even Southwark have acknowledged its a peak ho...  Not-Urgent
4    no batelec dont extend the brownout ur so sexe...  Not-Urgent
5    could we get an emergency loan cause matt mace...      Urgent
6    The two greatest of the in times of ...? ) a s...  Not-Urgent
7    Nancy Pelosi can go F herselfIn a letter to th...      Urgent
8    jesus even more reason to leave it at home & t...  Not-Urgent
9    urgent helptiny baby brittany slipped down nea...      Urgent
10   "To acknowledge others' requests as important ...  Not-Urgent
11   this lady is disabeld from legs and feet and r...      Urgent
12   advantages elements that keep the outer layer ...  Not-Urgent
13   ... It is tempting to say yes because ther

In [161]:
''' 
    /*------------------------- DATA_UNDERSTANDING -----------------------
    |    • Name of Attributes in Training Data
    |    • Total number of instances in Training Data
    *--------------------------------------------------------------------*/
'''
# Name of Attributes in Training Data
print("\n\nAttributes Names in Training Data:")
print("==================================\n")
print(training_data.columns)

# Total number of instances in Training Data
print("\n\nTotal Number of Instances in Training Data:", len(training_data.index))
print("==============================================\n")



Attributes Names in Training Data:

Index(['tweets', 'label'], dtype='object')


Total Number of Instances in Training Data: 4213



In [162]:
''' 
    /*------------------------PRE-PROCESSING-TRAINING-DATA-------------
    | Function  : lower() & re.sub()
    | Purpose   : Perform following preprocessing:
    |              • Lower case
    |              • Remove Punctuation marks
    | Arguments : 
    |       text: Text to be pre-processed
    | Return    :
    |       text: Pre-processed text
    *-----------------------------------------------------------------*/
'''
# Apply the Preprocessing on Input Data Text of Training Data
training_data['tweets'] = training_data['tweets'].apply(lambda x: " ".join(x.lower() for x in x.split())) 
training_data['tweets'] = training_data['tweets'].map(lambda x: re.sub(r'\W+', ' ', x)) 
preprocessed_training_data = training_data

# Save the Pre-processed Training Data into CSV File 
preprocessed_training_data.to_csv(r'D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-preprocessed-training-data.csv', index = False, header=True)
pd.set_option("display.max_rows", None, "display.max_columns", None)

print("\nTraining Data After Pre-processing:")
print("====================================\n")
print(preprocessed_training_data)


Training Data After Pre-processing:

                                                 tweets       label
0     my token no is kol06442019200931 sir requestin...      Urgent
1     my father didn t tell me how to live he lived ...  Not-Urgent
2     my twitter people you need to join the convers...      Urgent
3     mystery package left ct on 16 with delivery in...  Not-Urgent
4     nah but it sounds rude phonecall should strict...      Urgent
5     name any other job where this level of incompe...  Not-Urgent
6     national assembly renovation more urgent than ...      Urgent
7     my fathers day has begun by needing to take my...  Not-Urgent
8     need a plumber in greenville wisconsin 877 drp...      Urgent
9     need a plumber in lower waterford vermont 877 ...  Not-Urgent
10    need a plumber in ponca city oklahoma 877 drpi...      Urgent
11    nah don t bother buying one saka and martinell...  Not-Urgent
12    need a o narayana hrudayalaya hospital hosur r...      Urgent
13    my f

In [163]:
''' 
    /*------------------------- DATA_UNDERSTANDING -----------------------
    |    • Name of Attributes in Testing Data
    |    • Total number of instances in Testing Data
    *--------------------------------------------------------------------*/
'''
# Name of Attributes in Testing Data
print("\n\nAttributes Names in Testing Data:")
print("=================================\n")
print(testing_data.columns)

# Total number of instances in Testing Data
print("\n\nTotal Number of Instances in Testing Data:", len(testing_data.index))
print("=============================================\n")



Attributes Names in Testing Data:

Index(['tweets', 'label'], dtype='object')


Total Number of Instances in Testing Data: 700



In [164]:
''' 
    /*------------------------PRE-PROCESSING-TESTING-DATA -------------
    | Function  : lower() & re.sub()
    | Purpose   : Perform following preprocessing:
    |              • Lower case
    |              • Remove Punctuation marks
    | Arguments : 
    |       text: Text to be pre-processed
    | Return    :
    |       text: Pre-processed text
    *-----------------------------------------------------------------*/
'''
# Apply the Preprocessing on Input Data Text of Testing Data
testing_data['tweets'] = testing_data['tweets'].apply(lambda x: " ".join(x.lower() for x in x.split())) 
testing_data['tweets'] = testing_data['tweets'].map(lambda x: re.sub(r'\W+', ' ', x)) 
preprocessed_testing_data = testing_data

# Save the Pre-processed Testing Data into CSV File 
preprocessed_testing_data.to_csv(r'D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-preprocessed-testing-data.csv', index = False, header=True)
pd.set_option("display.max_rows", None, "display.max_columns", None)

print("\nTesting Data After Pre-processing:")
print("==================================\n")
print(preprocessed_testing_data)


Testing Data After Pre-processing:

                                                tweets       label
0    its actually battery which is unwanted touchin...  Not-Urgent
1    trans rights are under threat in the uk under ...      Urgent
2    sir me and my family are permanent residents o...  Not-Urgent
3    even southwark have acknowledged its a peak ho...  Not-Urgent
4    no batelec dont extend the brownout ur so sexe...  Not-Urgent
5    could we get an emergency loan cause matt mace...      Urgent
6    the two greatest of the in times of a sense of...  Not-Urgent
7    nancy pelosi can go f herselfin a letter to th...      Urgent
8    jesus even more reason to leave it at home tak...  Not-Urgent
9    urgent helptiny baby brittany slipped down nea...      Urgent
10    to acknowledge others requests as important b...  Not-Urgent
11   this lady is disabeld from legs and feet and r...      Urgent
12   advantages elements that keep the outer layer ...  Not-Urgent
13    it is tempting to s

In [165]:
'''
/*---------------- LABEL_ENCODING ---------------
| Function : Fit()
| Purpose : Fit or Train the Label Encoder for Target
| Attribute
| Arguments :
| Labels : Target Values
| Return :
| Instance : Returns an instance of self
*-------------------------------------------------------*/
'''
# Target Attribute / Label Encoder
#urgency = pd.DataFrame({"label":['1','0']})
urgency = pd.DataFrame({"label":['Urgent','Not-Urgent']})

In [166]:
# Initialize the Label Encoder
label_encode_urgency= LabelEncoder()
# Train the Label Encoder for Target Attribute Label
label_encode_urgency.fit(urgency)

LabelEncoder()

In [167]:
'''
/*---------------- TRANSFORM_OUTPUT_LABEL -----------
| Function : Transform()
| Purpose : Transform Output(Categorical) into Numerical
| Representation
| Arguments :
| Attribute: Target values
| Return : Attribute: Numerical Representation
*-------------------------------------------------------*/
'''
# Transform the Output Attribute (label) of Training Data into Numerical Representation
preprocessed_training_data["label"] = label_encode_urgency.transform(preprocessed_training_data["label"])
encoded_preprocessed_training_data = preprocessed_training_data
# Save the Training data with Encoded Output into CSV File
encoded_preprocessed_training_data.to_csv('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-training-data-encoded-output.csv', index = False,header=True)

pd.set_option("display.max_rows", None,"display.max_columns", None)
print("\n Training Data After Encoded Output:")
print("=================================\n")
print(encoded_preprocessed_training_data)


 Training Data After Encoded Output:

                                                 tweets  label
0     my token no is kol06442019200931 sir requestin...      1
1     my father didn t tell me how to live he lived ...      0
2     my twitter people you need to join the convers...      1
3     mystery package left ct on 16 with delivery in...      0
4     nah but it sounds rude phonecall should strict...      1
5     name any other job where this level of incompe...      0
6     national assembly renovation more urgent than ...      1
7     my fathers day has begun by needing to take my...      0
8     need a plumber in greenville wisconsin 877 drp...      1
9     need a plumber in lower waterford vermont 877 ...      0
10    need a plumber in ponca city oklahoma 877 drpi...      1
11    nah don t bother buying one saka and martinell...      0
12    need a o narayana hrudayalaya hospital hosur r...      1
13    my favorite countdown timer bar for is sales c...      0
14    need ab pl

In [168]:
'''
/*---------------- TRANSFORM_OUTPUT_LABEL -----------
| Function : Transform()
| Purpose : Transform Output(Categorical) into Numerical
| Representation
| Arguments :
| Attribute: Target values
| Return :
| Attribute: Numerical Representation
*-------------------------------------------------------*/
'''
# Transform the Output Attribute (label) of Testing Data into Numerical Representation
preprocessed_testing_data["label"] =label_encode_urgency.transform(preprocessed_testing_data["label"])
encoded_preprocessed_testing_data =preprocessed_testing_data
# Save the Testing data with Encoded Output into CSV File
encoded_preprocessed_testing_data.to_csv('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-testing-data-encoded-output.csv', index = False, header=True)

pd.set_option("display.max_rows", None,"display.max_columns", None)
print("\n Testing Data After Encoded Output:")
print("=================================\n")
print(encoded_preprocessed_testing_data)


 Testing Data After Encoded Output:

                                                tweets  label
0    its actually battery which is unwanted touchin...      0
1    trans rights are under threat in the uk under ...      1
2    sir me and my family are permanent residents o...      0
3    even southwark have acknowledged its a peak ho...      0
4    no batelec dont extend the brownout ur so sexe...      0
5    could we get an emergency loan cause matt mace...      1
6    the two greatest of the in times of a sense of...      0
7    nancy pelosi can go f herselfin a letter to th...      1
8    jesus even more reason to leave it at home tak...      0
9    urgent helptiny baby brittany slipped down nea...      1
10    to acknowledge others requests as important b...      0
11   this lady is disabeld from legs and feet and r...      1
12   advantages elements that keep the outer layer ...      0
13    it is tempting to say yes because there is a ...      0
14   immediate impeachment of ba

In [180]:
''' 
    /*----------------------------- FEATURE_EXTRACTION ----------------
    | Function  : CountVectorizer()
    | Purpose   : Transform Input (Text) into Numerical Representation 
    | Arguments : 
    |       Text: Input Text
    | Return    :
    |   Features: Numerical Representation
    *-----------------------------------------------------------------*/
'''

# Initialize the Count Vectorizer 
count_vectorizer = CountVectorizer(
        stop_words = None,  # Stopwords not Removed
        lowercase = True,     # Text Convert into Lower Case
        analyzer = 'word',    # Word n-grams Generation
        token_pattern = r'\w{3,}',  #vectorize 4-character words or more
        ngram_range = (1,1),  
        max_features = 10) # Extract All Features 
train_text = preprocessed_training_data['tweets']
# Fit the Count Vectorizer on Input Text of Training Data
count_vectorizer = count_vectorizer.fit(train_text)

 

In [181]:
''' 
    /*----------------- SAVE_THE_TRAINED_COUNT_VECTORIZER -------------------
    | Function  : dump()
    | Purpose   : Save the Trained Vectorizer on your Hard Disk
    | Arguments : 
    |    Model   : Model Objects
    | Return    :
    |    File    : Trained Vectorizer will be Saved on Hard Disk
    *-----------------------------------------------------------------------*/
'''

# Save the Trained Count Vectorizer in Pkl File
pickle.dump(count_vectorizer, open('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency_vectorizer_word_unigram.pkl', 'wb'))

In [182]:
# Transform the Input Text of Training Data using Trained Count Vectorizer
train_feature_vectors = count_vectorizer.transform(train_text)
train_feature_vectors = train_feature_vectors.todense()

# Get the name of Features (Feature  Set) and create a DataFrame of Input Features
input_training_features = pd.DataFrame(train_feature_vectors, columns = count_vectorizer.get_feature_names())

In [183]:
# Save the Unigram Features of Training Data into CSV File
input_training_features.to_csv(r'D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-training-datafeatures.csv', index = False, header=True)
pd.set_option("display.max_rows", None,"display.max_columns", None)
# Display the Document Feature Matrix of Training Data
print("\nDocument Features Matrix of Training Data :")
print("============================================\n")
print(input_training_features)



Document Features Matrix of Training Data :

      and  emergency  for  have  immediate  that  the  this  urgent  you
0       0          0    0     0          0     0    1     0       1    1
1       1          0    0     0          0     0    0     0       1    0
2       0          0    0     0          0     0    3     0       1    1
3       1          0    0     1          0     0    1     0       1    0
4       0          0    1     0          0     0    0     0       1    0
5       2          0    1     0          1     0    0     1       0    0
6       0          0    0     0          0     0    0     1       1    0
7       1          0    1     0          0     0    0     0       1    0
8       0          1    0     0          0     0    0     0       0    0
9       0          1    0     0          0     0    0     0       0    0
10      0          1    0     0          0     0    0     0       0    0
11      1          0    0     1          0     0    0     0       1    0
12   

In [184]:
# Input of Testing Data
test_text = encoded_preprocessed_testing_data['tweets']
# Transform the Input Text of Training Data using Trained Count Vectorizer 
test_feature_vectors = count_vectorizer.transform(test_text)
test_feature_vectors = test_feature_vectors.todense()
# Get the name of Features (Feature Set) and create a DataFrame of Input Features
input_testing_features =  pd.DataFrame(test_feature_vectors, columns = count_vectorizer.get_feature_names())

# Save the Unigram Features of Testing Data into CSV File
input_testing_features.to_csv('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-testing-datafeatures.csv', index = False, header=True)
pd.set_option("display.max_rows", None,"display.max_columns", None)
# Display the Document Feature Matrix of Testing Data
print("\nDocument Features Matrix of Testing Data :")
print("==========================================\n")
print(input_testing_features)



Document Features Matrix of Testing Data :

     and  emergency  for  have  immediate  that  the  this  urgent  you
0      0          0    0     0          1     0    1     0       0    0
1      1          0    0     0          0     0    3     0       1    2
2      1          0    0     0          1     1    0     0       0    0
3      0          0    1     1          1     0    3     1       0    1
4      0          0    0     0          0     0    1     0       0    0
5      0          1    0     0          0     0    0     0       0    0
6      0          0    0     0          0     0    3     0       0    0
7      2          0    0     0          1     0    3     0       0    0
8      0          1    0     0          0     0    0     0       0    1
9      0          0    0     0          0     0    0     0       1    0
10     0          0    0     0          0     1    0     0       2    0
11     2          1    0     0          0     0    0     1       0    1
12     2          0

In [185]:
'''
/*------------- LOAD_TRAINING_DATA_FEATURES ----------
| Function : read_csv()
| Purpose : Read a Dataset in CSV file Format
| Arguments :
| path: Name or Location Attribute to Split
| Return :
| Attribute: Split Attributes
*-------------------------------------------------------*/
'''
# Read the Data in CSV Format
training_data_features = pd.read_csv('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-training-datafeatures.csv')
training_data_features = pd.DataFrame(training_data_features)
pd.set_option("display.max_rows", None,"display.max_columns", None)

In [186]:
#Load the Training Data Features
print("Training Data in Numerical Representation:")
print("=========================================\n")
print(training_data_features)


Training Data in Numerical Representation:

      and  emergency  for  have  immediate  that  the  this  urgent  you
0       0          0    0     0          0     0    1     0       1    1
1       1          0    0     0          0     0    0     0       1    0
2       0          0    0     0          0     0    3     0       1    1
3       1          0    0     1          0     0    1     0       1    0
4       0          0    1     0          0     0    0     0       1    0
5       2          0    1     0          1     0    0     1       0    0
6       0          0    0     0          0     0    0     1       1    0
7       1          0    1     0          0     0    0     0       1    0
8       0          1    0     0          0     0    0     0       0    0
9       0          1    0     0          0     0    0     0       0    0
10      0          1    0     0          0     0    0     0       0    0
11      1          0    0     1          0     0    0     0       1    0
12     

In [187]:
'''
/*----- SPLITTING_TRAINING_DATA_OUTPUT/LABELS ------------*/
'''

# Split the Training Data Outputs / Labels and Create a DataFrame
training_data_output = pd.DataFrame(encoded_preprocessed_training_data["label"])
# Output Label  of Training Data
print("\nOutput of Training Data:")
print("========================\n")
print(training_data_output)



Output of Training Data:

      label
0         1
1         0
2         1
3         0
4         1
5         0
6         1
7         0
8         1
9         0
10        1
11        0
12        1
13        0
14        1
15        0
16        1
17        0
18        1
19        0
20        1
21        0
22        1
23        0
24        1
25        0
26        1
27        0
28        1
29        0
30        1
31        0
32        1
33        0
34        1
35        0
36        1
37        0
38        1
39        0
40        1
41        0
42        1
43        0
44        1
45        0
46        1
47        0
48        1
49        0
50        1
51        0
52        1
53        0
54        1
55        0
56        1
57        0
58        1
59        0
60        1
61        0
62        1
63        0
64        1
65        0
66        1
67        0
68        1
69        0
70        1
71        0
72        1
73        0
74        1
75        0
76        1
77        0
78        1
79        0
8

In [188]:
'''
/*--------------- TRAIN_MACHINE_LEARNING_MODEL-----------
| Function : RandomForestClassifier()
| Purpose : Train the Algorithm on Training Data
| Arguments :
| Training Data: Provide Training Data to the Model
| Return :
| Parameter: Model return the Training Parameters
*-------------------------------------------------------*/
'''
random_forest = RandomForestClassifier()
random_forest_classifier = random_forest.fit(training_data_features,np.ravel(training_data_output))
print("Parameters and their values:")
print("============================\n")
print(random_forest_classifier)


Parameters and their values:

RandomForestClassifier()


In [189]:
'''
/*----------------- SAVE_THE_TRAINED_MODEL ---------------
| Function : dump()
| Purpose : Save the Trained Model on your Hard Disk
| Arguments :
| Model : Model Objects
| Return :
| File : Trained Model will be Saved on Hard Disk
*-------------------------------------------------------*/
'''
# Save the Trained Model
pickle.dump(random_forest, open('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/rf_urgency_trained_model.pkl',
'wb'))



In [190]:
'''
/*---------------- LOAD_TESTING_DATA_FEATURES ------------
| Function : read_csv()
| Purpose : Read a Dataset in CSV file Format
| Arguments :
| path : Path to dataset file
| Features: Features File name
| Return :
| Features: Features in DataFrame Format
*-------------------------------------------------------*/
'''
# Read the Data in CSV Format
testing_data_features = pd.read_csv('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency-testing-datafeatures.csv')
testing_data_features = pd.DataFrame(testing_data_features)
#Load the Testing Data Features
print("Testing Data in Numerical Representation:")
print("=========================================\n")
print(testing_data_features)

Testing Data in Numerical Representation:

     and  emergency  for  have  immediate  that  the  this  urgent  you
0      0          0    0     0          1     0    1     0       0    0
1      1          0    0     0          0     0    3     0       1    2
2      1          0    0     0          1     1    0     0       0    0
3      0          0    1     1          1     0    3     1       0    1
4      0          0    0     0          0     0    1     0       0    0
5      0          1    0     0          0     0    0     0       0    0
6      0          0    0     0          0     0    3     0       0    0
7      2          0    0     0          1     0    3     0       0    0
8      0          1    0     0          0     0    0     0       0    1
9      0          0    0     0          0     0    0     0       1    0
10     0          0    0     0          0     1    0     0       2    0
11     2          1    0     0          0     0    0     1       0    1
12     2          0  

In [191]:
'''
/*---------------------- LOAD_SAVED_MODEL ---------------
| Function : load()
| Purpose : Method to load previously saved model
| Arguments :
| Model : Trained Model
| Return :
| File : Saved model will be loaded in memory
*------------------------------------------------------*/
'''
# Load the Saved Model
trained_model = pickle.load(open('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/rf_urgency_trained_model.pkl',
'rb'))

In [192]:
'''
/*--------- SPLITTING_TESTING_DATA_OUTPUTS/LABELS ---
'''
# Split the Training Data Outputs / Labels and Create a DataFrame
testing_data_output = pd.DataFrame(encoded_preprocessed_testing_data["label"])
# Output Label Gender of Testing Data
print("\nOutput of Testing Data:")
print("=======================\n")
print(testing_data_output)



Output of Testing Data:

     label
0        0
1        1
2        0
3        0
4        0
5        1
6        0
7        1
8        0
9        1
10       0
11       1
12       0
13       0
14       1
15       1
16       1
17       1
18       0
19       1
20       1
21       1
22       1
23       0
24       0
25       1
26       1
27       1
28       1
29       1
30       1
31       0
32       1
33       1
34       0
35       0
36       0
37       1
38       1
39       1
40       1
41       1
42       0
43       1
44       1
45       1
46       1
47       1
48       1
49       1
50       1
51       1
52       0
53       1
54       1
55       1
56       0
57       1
58       0
59       0
60       1
61       1
62       1
63       1
64       1
65       0
66       1
67       0
68       1
69       1
70       0
71       1
72       1
73       1
74       1
75       0
76       1
77       0
78       0
79       1
80       1
81       0
82       1
83       1
84       1
85       0
86       1
87    

In [193]:
'''
/*-------------------- EVALUATE_MACHINE_LEARNING_MODEL----
| Function : Predict()
| Purpose : Make a prediction using Algorithm on Test
| Data
| Arguments :
| Testing Data: Provide Test Data to the Trained Model
| Return :
| Predictions: Model return Predictions
*-------------------------------------------------------*/
'''
# Make a Predictions on Test Data
model_predictions = trained_model.predict(testing_data_features)
# Create a DataFrame of input Feature vectores
input_testing_features = pd.DataFrame(testing_data_features)
# Create a DataFrame of Output Label
testing_data_output = pd.DataFrame(encoded_preprocessed_testing_data["label"])
# Combine the Input Features of Testing Data and Output Label
testing_data_features = input_testing_features.join(testing_data_output)
testing_data_features["Predicted Label"] = model_predictions
# Print the Predictions
model_predictions = testing_data_features
print("\nPrediction using Random Forest Model:")
print("=====================================\n")
print(model_predictions)



Prediction using Random Forest Model:

     and  emergency  for  have  immediate  that  the  this  urgent  you  \
0      0          0    0     0          1     0    1     0       0    0   
1      1          0    0     0          0     0    3     0       1    2   
2      1          0    0     0          1     1    0     0       0    0   
3      0          0    1     1          1     0    3     1       0    1   
4      0          0    0     0          0     0    1     0       0    0   
5      0          1    0     0          0     0    0     0       0    0   
6      0          0    0     0          0     0    3     0       0    0   
7      2          0    0     0          1     0    3     0       0    0   
8      0          1    0     0          0     0    0     0       0    1   
9      0          0    0     0          0     0    0     0       1    0   
10     0          0    0     0          0     1    0     0       2    0   
11     2          1    0     0          0     0    0     1  

In [194]:
'''
/*------------------- CALCULATE_ACCURACY_SCORE -----------
| Function : Score()
| Purpose : Evaluate the algorithm on Testing data
| Arguments :
| prediction : Predicted values
| label : Actual values
| Return :
| Accuracy: Accuracy Score
*-------------------------------------------------------*/
'''
# Calculate the Accuracy Score
model_accuracy_score = round(random_forest.score(input_testing_features,testing_data_output),2)
print("\n\nAccuracy Score of Random Forest Classifier:")
print("============================================\n")
print(model_accuracy_score)



Accuracy Score of Random Forest Classifier:

0.53


In [213]:
'''
/*-------------------------- TAKE_USER_INPUT -------------
'''
unseen_user_input = input("Please enter the text :")
# Convert User Input into DataFrame
unseen_data =pd.DataFrame([(unseen_user_input)],columns=["tweets"])
print("\nUser input:")
print("============\n")
unseen_data

Please enter the text :this is needed to be done on urgent basis

User input:



Unnamed: 0,tweets
0,this is needed to be done on urgent basis


In [214]:
'''
/*------------------ LOAD_Vectorizer----------------------
| Function : load()
| Purpose : Method to load previously saved Vectorizer
| Arguments :
| Model : Trained Model
| Return :
| File : Saved Vectorizer will be loaded in memory
*-------------------------------------------------------*/
'''
# Load the Features saved previously
vectorizer_word_unigram = pickle.load(open('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/urgency_vectorizer_word_unigram.pkl', 'rb'))


In [215]:
'''
/*------------- TRANSFORM_UNSEEN_INTPUT_FEATURES ---------
| Function : Transform()
| Purpose : Transform Unseen Input (Categorical) into
| Feature Vector
| Arguments :
| Data: Unssen Data
| Return :
| Feature Vector: Feature Vector of Unseen Input Data
*-------------------------------------------------------*/
'''
unseen_data = unseen_data["tweets"]
# Transform the Features of Unseen Data using using the Loaded Vectorizer
transform_unseen_data = vectorizer_word_unigram.transform(unseen_data)

transform_unseen_data = transform_unseen_data.todense()
word_unigram_features = vectorizer_word_unigram.get_feature_names()
unseen_data_features = pd.DataFrame(transform_unseen_data,
columns = word_unigram_features)
print("\nFeature Vector of Unseen Data :")
print("===============================\n")
print(unseen_data_features)


Feature Vector of Unseen Data :

   and  emergency  for  have  immediate  that  the  this  urgent  you
0    0          0    0     0          0     0    0     1       1    0


In [216]:
'''
/*----------------------------- LOAD_SAVED_MODEL ---------
| Function : load()
| Purpose : Method to load previously saved model
| Arguments :
| Model : Trained Model
| Return :
| File : Saved model will be loaded in memor
*-------------------------------------------------------*/
'''
# Load the Saved Model
trained_model = pickle.load(open('D:/FYP/saqib-fyp-app/fyp/fyp_app/urgency model/rf_urgency_trained_model.pkl','rb'))


In [217]:
'''
/*--------------- MODEL_PREDICTION ----------------------
| Function : predict()
| Purpose : Use Trained Model to Predict the Output of
| Unseen Instances
| Arguments :
| User Data: Feature Vector of Unseen Instance
| Return :
| Gender: Model will Return the Gender Prediction
*-------------------------------------------------------*/
'''
# Predict the Label on Unseen Data
predicted_Label = trained_model.predict(unseen_data_features)

In [218]:
if predicted_Label == 1:
        Prediction = "Urgent"
if predicted_Label == 0:
        Prediction = "Not-Urgent"
# Add the Prediction in a Pretty Table
pretty_table_prediction = PrettyTable([' ** Prediction ** '])
pretty_table_prediction.add_row([Prediction])
print("\n\nModel Prediction:")
print("===================\n")
print(pretty_table_prediction)



Model Prediction:

+--------------------+
|  ** Prediction **  |
+--------------------+
|       Urgent       |
+--------------------+
