In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install tensorflow_text



In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
import pandas as pd
df=pd.read_csv('/content/drive/MyDrive/4 assistant_processed.csv')

In [None]:
df.sample(5)

Unnamed: 0,Assistant,type
187274,Sleep deprivation definitely can be quite diff...,unbiased
83036,Is this an academic or leisure trip?\n\n,unbiased
159452,Good question. To make sure you have the righ...,unbiased
287016,"I’m sorry, but I can’t think of anything. How...",biased
144318,"In one month, the earth will go through all th...",unbiased


In [None]:

df.groupby('type').describe()

Unnamed: 0_level_0,Assistant,Assistant,Assistant,Assistant
Unnamed: 0_level_1,count,unique,top,freq
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
biased,92385,92385,Ass.\n,1
unbiased,281100,281100,Here’s an incomplete list.\n\n,1


In [None]:
df.type.value_counts()

unbiased    281100
biased       92385
Name: type, dtype: int64

In [None]:
df.shape

(373485, 2)

In [None]:

df['type'].replace({'biased':1,'unbiased':0},inplace=True)

In [None]:
df.sample(5)

Unnamed: 0,Assistant,type
353400,This is not a very useful interaction. I am a...,1
260156,Here are a few things you should do to care fo...,0
51512,That depends on a lot of factors. Why do you ...,0
189124,Well I guess it's hard to fully appreciate how...,0
302481,They were for the most part positive environme...,1


In [None]:
count_class_0, count_class_1 = df.type.value_counts()

df_class_0 = df[df['type'] == 0]
df_class_1 = df[df['type'] == 1]

In [None]:
# Oversample 1-class and concat the DataFrames of both classes
df_class_1_over = df_class_1.sample(count_class_0, replace=True)
df_balanced = pd.concat([df_class_0, df_class_1_over], axis=0)

print('Random over-sampling:')
print(df_balanced.type.value_counts())

Random over-sampling:
0    281100
1    281100
Name: type, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split

# Assuming df_balanced is your DataFrame with 'Assistant' and 'type' columns
x_train, x_test, y_train, y_test = train_test_split(df_balanced['Assistant'], df_balanced['type'], test_size=0.2, random_state=15, stratify=df_balanced['type'])


In [None]:
x_train.head(5)

100448    Yeah it’s a good time for a bit of “getting to...
309970    So, you think wars and population control are ...
98938     I am here to help you.  Do you want to learn h...
208996    A flat earth conspiracy theory is a belief tha...
93682     Alright.  Do you want me to take control of th...
Name: Assistant, dtype: object

In [None]:
y_train.head(5)

100448    0
309970    1
98938     0
208996    0
93682     0
Name: type, dtype: int64

In [None]:
bert_preprocess = hub.KerasLayer(
    "https://kaggle.com/models/tensorflow/bert/frameworks/TensorFlow2/variations/en-uncased-preprocess/versions/3")
bert_encoder = hub.KerasLayer(
    "https://www.kaggle.com/models/tensorflow/bert/frameworks/TensorFlow2/variations/en-wwm-uncased-l-24-h-1024-a-16/versions/4")

In [None]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

get_sentence_embeding([
    "Buy now and experience the unmatched quality of this product!",
    "This product is designed to meet your requirements."
    ]
)

<tf.Tensor: shape=(2, 1024), dtype=float32, numpy=
array([[ 0.9820528 , -0.9207038 , -0.9312764 , ..., -0.99646497,
        -0.8868168 ,  0.92512316],
       [ 0.9901206 , -0.9758701 , -0.9254258 , ..., -0.99923   ,
        -0.97999173,  0.904323  ]], dtype=float32)>

In [None]:
#create a functional model

#Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='Assistant')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

#neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(l)

model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Assistant (InputLayer)      [(None,)]                    0         []                            
                                                                                                  
 keras_layer (KerasLayer)    {'input_word_ids': (None,    0         ['Assistant[0][0]']           
                             128),                                                                
                              'input_type_ids': (None,                                            
                             128),                                                                
                              'input_mask': (None, 128)                                           
                             }                                                                

In [None]:
len(x_train)

449760

In [None]:
METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=METRICS)

In [None]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5


In [None]:
model.evaluate(x_test, y_test)

In [None]:
y_predicted = model.predict(x_test)
y_predicted = y_predicted.flatten()

In [None]:
import numpy as np

y_predicted = np.where(y_predicted > 0.5, 1, 0)
y_predicted

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_predicted)
cm

In [None]:
from matplotlib import pyplot as plt
import seaborn as sn
sn.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
print(classification_report(y_test, y_predicted))

In [None]:
reviews = [
    "The BlazingBolt phone is so fast, it will blow your mind!",
    "Drown out the world with the TranquilTunes headphones, guaranteed silence!",
    "Stay ahead of the curve with this cutting-edge product.",
    "It's crystal clear that this product outshines every rival, standing as the undisputed champion in terms of quality and performance.",
    "Compared to competitors, the FitPulse boasts a longer battery life and a wider range of health monitoring features.",
    "Based on a survey of 1,000 users, 85% reported an increase in their daily steps after using the XFit tracker for 4 weeks. The average increase was 2,000 steps per day.",
    "On average, headphones with active noise cancellation (ANC) achieve a 20-25 dB reduction in low-frequency noise, based on expert reviews. However, individual experiences may vary depending on the specific model, fit, and noise environment."]
predictions = model.predict(reviews)
print(predictions)


In [None]:
for i in predictions:
    if i > 0.5:
        print("Biased")
    else:
        print("Unbiased")

In [None]:
import tensorflow as tf
from transformers import BertTokenizer

# Load the model
loaded_model = tf.keras.models.load_model('bert_model', custom_objects={'KerasLayer': hub.KerasLayer})

# Load the tokenizer
loaded_tokenizer = BertTokenizer.from_pretrained('bert_tokenizer')

# Check the loaded model summary
loaded_model.summary()


In [None]:
reviews = [
    "The BlazingBolt phone is so fast, it will blow your mind!",
    "Drown out the world with the TranquilTunes headphones, guaranteed silence!",
    "Stay ahead of the curve with this cutting-edge product.",
    "It's crystal clear that this product outshines every rival, standing as the undisputed champion in terms of quality and performance.",
    "Compared to competitors, the FitPulse boasts a longer battery life and a wider range of health monitoring features.",
    "Based on a survey of 1,000 users, 85% reported an increase in their daily steps after using the XFit tracker for 4 weeks. The average increase was 2,000 steps per day.",
    "On average, headphones with active noise cancellation (ANC) achieve a 20-25 dB reduction in low-frequency noise, based on expert reviews. However, individual experiences may vary depending on the specific model, fit, and noise environment."]
predictions = model.predict(reviews)
print(predictions)


In [None]:
# prompt: send bert_tockenizer folder to drive

!cp -r bert_large_tokenizer /content/drive/MyDrive


In [None]:
# Save the entire model
model.save('/content/drive/MyDrive/bert_large_model')

# Save the tokenizer
tokenizer.save_pretrained('/content/drive/MyDrive/bert_large_tokenizer')


In [None]:
import shutil

# Source path in Google Drive
src_folder_path = '/content/drive/MyDrive/bert_large_model/'

# Destination path in Colab runtime
dest_folder_path = '/content/'

# Copy the entire folder from Google Drive to Colab runtime
shutil.copytree(src_folder_path, dest_folder_path + 'bert_large_model/')


'/content/bert_large_model/'

In [None]:
import shutil

# Source path in Google Drive
src_folder_path = '/content/drive/MyDrive/bert_large_tokenizer/'

# Destination path in Colab runtime
dest_folder_path = '/content/'

# Copy the entire folder from Google Drive to Colab runtime
shutil.copytree(src_folder_path, dest_folder_path + 'bert_large_tokenizer/')


'/content/bert_large_tokenizer/'

In [None]:
import tensorflow as tf
from transformers import BertTokenizer
import tensorflow_hub as hub

In [None]:
!pip install tensorflow_text
import tensorflow_text as text  # Registers the ops.

Collecting tensorflow_text
  Downloading tensorflow_text-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_text
Successfully installed tensorflow_text-2.15.0


In [None]:
loaded_model = tf.keras.models.load_model('/content/bert_large_model', custom_objects={'KerasLayer': hub.KerasLayer})


In [None]:
loaded_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 Assistant (InputLayer)      [(None,)]                    0         []                            
                                                                                                  
 keras_layer (KerasLayer)    {'input_mask': (None, 128)   0         ['Assistant[0][0]']           
                             , 'input_word_ids': (None,                                           
                              128),                                                               
                              'input_type_ids': (None,                                            
                             128)}                                                                
                                                                                              