In [25]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report


In [26]:
df = pd.read_csv("sih dataset .csv")
df.head(5)


Unnamed: 0,QUESTIONS,RESULT
0,Which is the best place to go shopping in your...,irrelevant
1,What is the price of OnePlus Nord,irrelevant
2,When does the sun rise,irrelevant
3,Which place do you like to travel to,irrelevant
4,Which is your favorite animal,irrelevant


In [27]:
df['RESULT'].value_counts()


irrelevant    267
relevant      265
Name: RESULT, dtype: int64

In [28]:
df['Relevant']=df['RESULT'].apply(lambda x: 1 if x=='relevant' else 0)
df.sample(5)

Unnamed: 0,QUESTIONS,RESULT,Relevant
334,Whether MCA/ MBA Department of Arts and Scienc...,relevant,1
473,What are the different documents to be uploade...,relevant,1
399,What is National Academic Depository How to apply,relevant,1
159,What is last date to forward monthly attendance,relevant,1
133,I am opting for PG course for second time am ...,relevant,1


In [7]:
del df['relevant']

KeyError: 'relevant'

In [29]:
df

Unnamed: 0,QUESTIONS,RESULT,Relevant
0,Which is the best place to go shopping in your...,irrelevant,0
1,What is the price of OnePlus Nord,irrelevant,0
2,When does the sun rise,irrelevant,0
3,Which place do you like to travel to,irrelevant,0
4,Which is your favorite animal,irrelevant,0
...,...,...,...
527,Will the scholarship transferred in case the a...,relevant,1
528,Will the scholarship be renewed if the awardee...,relevant,1
529,Are the students who have enrolled in some co...,relevant,1
530,Can the awardee claim any other scholarship (S...,relevant,1


In [30]:
X_train, X_test, y_train, y_test = train_test_split(df['QUESTIONS'],df['Relevant'], stratify=df['Relevant'])


In [31]:
X_train.head(4)

445                   time period of SWANATH Scholarship
153    How to check the Aadhar Active status of bank ...
103    Whether a student who is recipient of Scholars...
157    When will I receive my scholarship after my ba...
Name: QUESTIONS, dtype: object

In [32]:
#BERT 
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3")

In [33]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [34]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer_2 (KerasLayer)     {'input_type_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_word_ids':                                                
                                (None, 128)}                                                

In [40]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

In [41]:
model.fit(X_train, y_train, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x18007097948>

In [42]:
model.evaluate(X_test, y_test)




[0.2956489026546478,
 0.8947368264198303,
 0.9482758641242981,
 0.8333333134651184]

In [43]:
y_predicted = model.predict(X_test)
y_predicted = y_predicted.flatten()



In [44]:
import numpy as np

y_predicted = np.where(y_predicted > 0.5, 1, 0)
y_predicted

array([0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
       1])

In [46]:
questions = [
    "what is your name",
    "what is date today",
    "what is admission proccess?",
    "what is MOOC",
    "where are you from",
    "where did i get form?",
    "how will i get scholarship?"
]
model.predict(questions)



array([[0.14889316],
       [0.34507388],
       [0.4985399 ],
       [0.64395475],
       [0.24871436],
       [0.5548014 ],
       [0.60208017]], dtype=float32)

In [51]:
model.save('model1',save_format='h5')