# Training Models

# Model 1 (1.89 )

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
import tensorflow as tf

# Load data
data = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using BERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
maxlen = 100

X_title = data['Title_tokens'].apply(lambda x: eval(x)).values
X_body = data['Body_tokens'].apply(lambda x: eval(x)).values
X_combined = [' '.join(title + body) for title, body in zip(X_title, X_body)]

X_train, X_test, y_train, y_test = train_test_split(X_combined, data['Score'].values, test_size=0.2, random_state=42)

train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=maxlen)
test_encodings = tokenizer(X_test, truncation=True, padding=True, max_length=maxlen)

# Convert lists to TensorFlow Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings.items()},
    y_train
)).shuffle(len(X_train)).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings.items()},
    y_test
)).batch(16)

# Load pre-trained DistilBERT model for sequence classification
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=1)

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss = tf.keras.losses.MeanSquaredError()
model.compile(optimizer=optimizer, loss=loss, metrics=['mean_squared_error'])

# Fine-tune the model
model.fit(train_dataset, epochs=3, validation_data=test_dataset)

# Evaluate the model
loss, mse = model.evaluate(test_dataset)
print("Mean Squared Error:", mse)


  from .autonotebook import tqdm as notebook_tqdm






Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

Epoch 1/3
Cause: for/else statement not yet supported
Cause: for/else statement not yet supported



Epoch 2/3
Epoch 3/3
Mean Squared Error: 1.8997981548309326


In [3]:
import numpy as np
from transformers import DistilBertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input = input("Enter your question: ")

# Preprocess input text
input_ids = preprocess_input(user_input, tokenizer, maxlen)

# Make prediction
predicted_score = model.predict(input_ids)
predicted_score = int(np.round(predicted_score[0][0]))

# Display predicted score
print("Predicted Score:", predicted_score[0][0])


Predicted Score: [1.0394537]


# Model 2 ( 1.99 )

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
import tensorflow as tf

# Load data
data2 = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using BERT tokenizer
tokenizer2 = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
maxlen2 = 100

X_title2 = data2['Title_tokens'].apply(lambda x: eval(x)).values
X_body2 = data2['Body_tokens'].apply(lambda x: eval(x)).values
X_combined2 = [' '.join(title + body) for title, body in zip(X_title2, X_body2)]

X_train2, X_test2, y_train2, y_test2 = train_test_split(X_combined2, data2['Score'].values, test_size=0.2, random_state=42)

train_encodings2 = tokenizer2(X_train2, truncation=True, padding=True, max_length=maxlen2)
test_encodings2 = tokenizer2(X_test2, truncation=True, padding=True, max_length=maxlen2)

# Convert lists to TensorFlow Dataset
train_dataset2 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings2.items()},
    y_train2
)).shuffle(len(X_train2)).batch(16)

test_dataset2 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings2.items()},
    y_test2
)).batch(16)

# Load pre-trained DistilBERT model for sequence classification
model2 = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=1)

# Modify the model for regression
model2.layers[-1].activation = tf.keras.activations.linear

# Compile the model
optimizer2 = 'adam'  # Use string identifier
loss2 = tf.keras.losses.MeanSquaredError()
model2.compile(optimizer=optimizer2, loss=loss2, metrics=['mean_squared_error'])

# Fine-tune the model
model2.fit(train_dataset2, epochs=3, validation_data=test_dataset2)

# Evaluate the model
loss2, mse2 = model2.evaluate(test_dataset2)
print("Mean Squared Error 2:", mse2)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

Epoch 1/3
Epoch 2/3
Epoch 3/3
Mean Squared Error 2: 1.9955143928527832


In [2]:
import numpy as np
from transformers import DistilBertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer2 = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input2 = input("Enter your question: ")

# Preprocess input text
input_ids2 = preprocess_input(user_input2, tokenizer2, maxlen)

# Make prediction
predicted_score2 = model2.predict(input_ids2)

# Display predicted score
print("Predicted Score:", predicted_score2[0][0])


NameError: name 'model2' is not defined

# Model 3 ( 1.04 )

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
import tensorflow as tf

# Load data
data3 = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using BERT tokenizer
tokenizer3 = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
maxlen3 = 100

X_title3 = data3['Title_tokens'].apply(lambda x: eval(x)).values
X_body3 = data3['Body_tokens'].apply(lambda x: eval(x)).values
X_combined3 = [' '.join(title + body) for title, body in zip(X_title3, X_body3)]

X_train3, X_test3, y_train3, y_test3 = train_test_split(X_combined3, data3['Score'].values, test_size=0.2, random_state=42)

train_encodings3 = tokenizer3(X_train3, truncation=True, padding=True, max_length=maxlen3)
test_encodings3 = tokenizer3(X_test3, truncation=True, padding=True, max_length=maxlen3)

# Convert lists to TensorFlow Dataset
train_dataset3 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings3.items()},
    y_train3
)).shuffle(len(X_train3)).batch(16)

test_dataset3 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings3.items()},
    y_test3
)).batch(16)

# Load pre-trained DistilBERT model for sequence classification
model3 = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=1)

# Modify the model for regression
model3.layers[-1].activation = tf.keras.activations.linear

# Compile the model with Mean Absolute Error (MAE) as the loss function and evaluation metric
optimizer3 = 'adam'  # Use string identifier
loss3 = tf.keras.losses.MeanAbsoluteError()  # Change to MAE
model3.compile(optimizer=optimizer3, loss=loss3, metrics=['mean_absolute_error'])  # Using MAE as a metric for monitoring

# Fine-tune the model
model3.fit(train_dataset3, epochs=5, validation_data=test_dataset3)  # Increased epochs

# Evaluate the model
loss3, mae3 = model3.evaluate(test_dataset3)
print("Mean Absolute Error 3:", mae3)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Mean Absolute Error 3: 1.0452467203140259


In [10]:
import numpy as np
from transformers import DistilBertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input = input("Enter your question: ")

# Preprocess input text
input_ids = preprocess_input(user_input, tokenizer, maxlen)
attention_mask = np.ones_like(input_ids)

# Convert input to a dictionary to match the model input format
input_dict = {
    'input_ids': input_ids,
    'attention_mask': attention_mask
}

# Make prediction
predicted_score = model3.predict(input_dict)

# Convert the predicted score to a natural number
predicted_score = int(np.round(predicted_score[0][0]))

# Display predicted score
print("Predicted Score:", predicted_score)


NameError: name 'model3' is not defined

# Model 4 ( 1.05 )

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, TFRobertaModel
import tensorflow as tf

# Load data
data4 = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using RoBERTa tokenizer
tokenizer4 = RobertaTokenizer.from_pretrained('roberta-base')
maxlen4 = 100

X_title4 = data4['Title_tokens'].apply(lambda x: eval(x)).values
X_body4 = data4['Body_tokens'].apply(lambda x: eval(x)).values
X_combined4 = [' '.join(title + body) for title, body in zip(X_title4, X_body4)]

X_train4, X_test4, y_train4, y_test4 = train_test_split(X_combined4, data4['Score'].values, test_size=0.2, random_state=42)

train_encodings4 = tokenizer4(X_train4, truncation=True, padding=True, max_length=maxlen4)
test_encodings4 = tokenizer4(X_test4, truncation=True, padding=True, max_length=maxlen4)

# Convert lists to TensorFlow Dataset
train_dataset4 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings4.items()},
    y_train4
)).shuffle(len(X_train4)).batch(16)

test_dataset4 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings4.items()},
    y_test4
)).batch(16)

# Load pre-trained RoBERTa model
roberta_model4 = TFRobertaModel.from_pretrained('roberta-base')

# Define input layers
input_ids4 = tf.keras.layers.Input(shape=(maxlen4,), dtype=tf.int32, name='input_ids')
attention_mask4 = tf.keras.layers.Input(shape=(maxlen4,), dtype=tf.int32, name='attention_mask')

# Call RoBERTa model with input ids and attention masks
roberta_outputs4 = roberta_model4(input_ids4, attention_mask=attention_mask4)[0]

# Take the CLS token representation
cls_token4 = roberta_outputs4[:, 0, :]

# Define regression head
outputs4 = tf.keras.layers.Dense(1, activation='linear')(cls_token4)

# Define model
model4 = tf.keras.Model(inputs=[input_ids4, attention_mask4], outputs=outputs4)

# Compile the model with Mean Absolute Error loss
optimizer4 = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss4 = tf.keras.losses.MeanAbsoluteError()
model4.compile(optimizer=optimizer4, loss=loss4, metrics=['mean_absolute_error'])

# Fine-tune the model
model4.fit(train_dataset4, epochs=3, validation_data=test_dataset4)

# Evaluate the model
mae4 = model4.evaluate(test_dataset4)[1]
print("Mean Absolute Error (RoBERTa):", mae4)


  from .autonotebook import tqdm as notebook_tqdm






Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['roberta.embeddings.position_ids', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaModel were not initialized from the PyTorch model and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infe


Epoch 1/3


Epoch 2/3
Epoch 3/3
Mean Absolute Error (RoBERTa): 1.0556235313415527


In [9]:
import numpy as np
from transformers import RobertaTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input = input("Enter your question: ")

# Preprocess input text
input_ids = preprocess_input(user_input, tokenizer, maxlen)
attention_mask = np.ones_like(input_ids)

# Convert input to a dictionary to match the model input format
input_dict = {
    'input_ids': input_ids,
    'attention_mask': attention_mask
}

# Make prediction
predicted_score = model4.predict(input_dict)

# Convert the predicted score to a natural number
predicted_score = int(np.round(predicted_score[0][0]))

# Display predicted score
print("Predicted Score:", predicted_score)


Predicted Score: 1


# Model 5 ( 1.05 )

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import AlbertTokenizer, TFAlbertModel
import tensorflow as tf

# Load data
data5 = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using ALBERT tokenizer
tokenizer5 = AlbertTokenizer.from_pretrained('albert-base-v2')
maxlen5 = 100

X_title5 = data5['Title_tokens'].apply(lambda x: eval(x)).values
X_body5 = data5['Body_tokens'].apply(lambda x: eval(x)).values
X_combined5 = [' '.join(title + body) for title, body in zip(X_title5, X_body5)]

X_train5, X_test5, y_train5, y_test5 = train_test_split(X_combined5, data5['Score'].values, test_size=0.2, random_state=42)

train_encodings5 = tokenizer5(X_train5, truncation=True, padding=True, max_length=maxlen5)
test_encodings5 = tokenizer5(X_test5, truncation=True, padding=True, max_length=maxlen5)

# Convert lists to TensorFlow Dataset
train_dataset5 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings5.items()},
    y_train5
)).shuffle(len(X_train5)).batch(16)

test_dataset5 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings5.items()},
    y_test5
)).batch(16)

# Load pre-trained ALBERT model
albert_model5 = TFAlbertModel.from_pretrained('albert-base-v2')

# Define input layers
input_ids5 = tf.keras.layers.Input(shape=(maxlen5,), dtype=tf.int32, name='input_ids')
attention_mask5 = tf.keras.layers.Input(shape=(maxlen5,), dtype=tf.int32, name='attention_mask')

# Call ALBERT model with input ids and attention masks
albert_outputs5 = albert_model5(input_ids5, attention_mask=attention_mask5)[0]

# Take the CLS token representation
cls_token5 = albert_outputs5[:, 0, :]

# Define regression head
outputs5 = tf.keras.layers.Dense(1, activation='linear')(cls_token5)

# Define model
model5 = tf.keras.Model(inputs=[input_ids5, attention_mask5], outputs=outputs5)

# Compile the model with Mean Absolute Error loss
optimizer5 = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss5 = tf.keras.losses.MeanAbsoluteError()
model5.compile(optimizer=optimizer5, loss=loss5, metrics=['mean_absolute_error'])

# Fine-tune the model
model5.fit(train_dataset5, epochs=3, validation_data=test_dataset5)

# Evaluate the model
mae5 = model5.evaluate(test_dataset5)[1]
print("Mean Absolute Error (ALBERT):", mae5)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.bias', 'predictions.dense.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.bias', 'predictions.dense.weight']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.


Epoch 1/3




Epoch 2/3
Epoch 3/3
Mean Absolute Error (ALBERT): 1.0562832355499268


In [6]:
import numpy as np
from transformers import AlbertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input = input("Enter your question: ")

# Preprocess input text
input_ids = preprocess_input(user_input, tokenizer, maxlen)
attention_mask = np.ones_like(input_ids)

# Convert input to a dictionary to match the model input format
input_dict = {
    'input_ids': input_ids,
    'attention_mask': attention_mask
}

# Make prediction
predicted_score = model5.predict(input_dict)

# Convert the predicted score to a natural number
predicted_score = int(np.round(predicted_score[0][0]))

# Display predicted score
print("Predicted Score:", predicted_score)


NameError: name 'model5' is not defined

# Model 6 ( 1.06 )

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import XLNetTokenizer, TFXLNetModel
import tensorflow as tf

# Load data
data6 = pd.read_csv("training_dataset.csv")

# Tokenize and pad sequences using XLNet tokenizer
tokenizer6 = XLNetTokenizer.from_pretrained('xlnet-base-cased')
maxlen6 = 100

X_title6 = data6['Title_tokens'].apply(lambda x: eval(x)).values
X_body6 = data6['Body_tokens'].apply(lambda x: eval(x)).values
X_combined6 = [' '.join(title + body) for title, body in zip(X_title6, X_body6)]

X_train6, X_test6, y_train6, y_test6 = train_test_split(X_combined6, data6['Score'].values, test_size=0.2, random_state=42)

train_encodings6 = tokenizer6(X_train6, truncation=True, padding=True, max_length=maxlen6)
test_encodings6 = tokenizer6(X_test6, truncation=True, padding=True, max_length=maxlen6)

# Convert lists to TensorFlow Dataset
train_dataset6 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in train_encodings6.items()},
    y_train6
)).shuffle(len(X_train6)).batch(16)

test_dataset6 = tf.data.Dataset.from_tensor_slices((
    {key: np.array(value) for key, value in test_encodings6.items()},
    y_test6
)).batch(16)

# Load pre-trained XLNet model
xlnet_model6 = TFXLNetModel.from_pretrained('xlnet-base-cased')

# Define input layers
input_ids6 = tf.keras.layers.Input(shape=(maxlen6,), dtype=tf.int32, name='input_ids')
attention_mask6 = tf.keras.layers.Input(shape=(maxlen6,), dtype=tf.int32, name='attention_mask')

# Call XLNet model with input ids and attention masks
xlnet_outputs6 = xlnet_model6(input_ids6, attention_mask=attention_mask6)[0]

# Take the CLS token representation
cls_token6 = xlnet_outputs6[:, 0, :]

# Define regression head
outputs6 = tf.keras.layers.Dense(1, activation='linear')(cls_token6)

# Define model
model6 = tf.keras.Model(inputs=[input_ids6, attention_mask6], outputs=outputs6)

# Compile the model with Mean Absolute Error loss
optimizer6 = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss6 = tf.keras.losses.MeanAbsoluteError()
model6.compile(optimizer=optimizer6, loss=loss6, metrics=['mean_absolute_error'])

# Fine-tune the model
model6.fit(train_dataset6, epochs=3, validation_data=test_dataset6)

# Evaluate the model
mae6 = model6.evaluate(test_dataset6)[1]
print("Mean Absolute Error (XLNet):", mae6)


  from .autonotebook import tqdm as notebook_tqdm





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





Some layers from the model checkpoint at xlnet-base-cased were not used when initializing TFXLNetModel: ['lm_loss']
- This IS expected if you are initializing TFXLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFXLNetModel were initialized from the model checkpoint at xlnet-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFXLNetModel for predictions without further training.



Epoch 1/3






Epoch 2/3
Epoch 3/3
Mean Absolute Error (XLNet): 1.0639591217041016


In [8]:
import numpy as np
from transformers import XLNetTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess input text
def preprocess_input(text, tokenizer, maxlen):
    # Tokenize input text
    input_ids = tokenizer.encode(text, add_special_tokens=True, truncation=True, max_length=maxlen)
    # Pad sequences
    input_ids = pad_sequences([input_ids], maxlen=maxlen, dtype="long", value=0, truncating="post", padding="post")
    return np.array(input_ids)

# Load tokenizer
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')

# Maximum sequence length
maxlen = 100

# Prompt user for input text
user_input = input("Enter your question: ")

# Preprocess input text
input_ids = preprocess_input(user_input, tokenizer, maxlen)
attention_mask = np.ones_like(input_ids)

# Convert input to a dictionary to match the model input format
input_dict = {
    'input_ids': input_ids,
    'attention_mask': attention_mask
}

# Make prediction
predicted_score = model6.predict(input_dict)

# Convert the predicted score to a natural number
predicted_score = int(np.round(predicted_score[0][0]))

# Display predicted score
print("Predicted Score:", predicted_score)


NameError: name 'model6' is not defined