In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the first dataset to get the column labels
first_df = pd.read_excel('Labeled_Data/scraped_articles_business_2_with_sentiment.xlsx')
columns = first_df.columns  # Extract column names

# Load all sheets and concatenate using the columns from the first dataset
sheets = ['Labeled_Data/scraped_articles_business_2_with_sentiment.xlsx', 'Labeled_Data/scraped_articles_business1_with_sentiment.xlsx'
          , 'Labeled_Data/scraped_articles_tech_with_sentiment.xlsx', 'Labeled_Data/scraped_articles_with_sentiment.xlsx']
dfs = [pd.read_excel(sheet, names=columns) for sheet in sheets]  # Use the same column names
df = pd.concat(dfs, ignore_index=True)
print(df.tail())

                                                  Title  \
1006  NZ will be ready to go from first ball: Daryl ...   
1007  India shouldn't send team for CWG: Ex-coach Vi...   
1008  Perth Scorchers sign pacer from Indonesia for ...   
1009  Netflix shuts down AAA game development studio...   
1010  Pep Guardiola comments on Kevin de Bruyne's re...   

                                                   Data   class sentiment  
1006  New Zealand all-rounder Daryl Mitchell said th...  sports  positive  
1007  Ex-India badminton coach Vimal Kumar criticise...  sports  negative  
1008  Perth Scorchers have signed Indonesia seamer N...  sports  positive  
1009  AAA game development studio, Team Blue by Netf...  sports  positive  
1010  Head coach Pep Guardiola spoke at the pre-matc...  sports  positive  


In [3]:
from sklearn.utils import resample
import pandas as pd

positive = df[df['sentiment'] == 'positive']
negative = df[df['sentiment'] == 'negative']
neutral = df[df['sentiment'] == 'neutral']

negative_upsampled = resample(negative, replace=True, n_samples=len(positive), random_state=42)
neutral_upsampled = resample(neutral, replace=True, n_samples=len(positive), random_state=42)

df_upsampled = pd.concat([positive, negative_upsampled, neutral_upsampled])

print(df_upsampled['sentiment'].value_counts())


sentiment
positive    629
negative    629
neutral     629
Name: count, dtype: int64


In [34]:
df_upsampled.head(10)

Unnamed: 0,Title,Data,class,sentiment,combined_text
1,Companies’ earnings reports increase volatilit...,Share prices swing as high valuations and unce...,stock business,positive,Companies’ earnings reports increase volatilit...
2,Nasdaq hits record high as tech stocks rebound...,Sharp turnaround from 15% slide as investors m...,stock business,positive,Nasdaq hits record high as tech stocks rebound...
8,The problem with the Trump trade,Speculative hedge funds are placing bets but o...,stock business,positive,The problem with the Trump trade: Speculative ...
12,Karma comes for Boeing’s shareholders,Nearly a fifth of total shares will soon be ow...,stock business,positive,Karma comes for Boeing’s shareholders: Nearly ...
13,Bond market braced for rise in UK debt issuanc...,Investment banks pencil in second-biggest annu...,stock business,positive,Bond market braced for rise in UK debt issuanc...
18,Crypto exchanges turn to derivatives to lure c...,New market entrants switch focus to offering l...,stock business,positive,Crypto exchanges turn to derivatives to lure c...
20,Georgia’s disputed election,Pivotal election widely seen as a choice betwe...,stock business,positive,Georgia’s disputed election: Pivotal election ...
28,Meet the salad hawkers that are valued like te...,But rally for Sweetgreen and Cava may not stay...,stock business,positive,Meet the salad hawkers that are valued like te...
30,Crocs doubled down on ugly. It is paying off,Maker of foam clogs has gone from laughing sto...,stock business,positive,Crocs doubled down on ugly. It is paying off: ...
32,Top US regulator warns of potential crisis if ...,Finance industry steps up effort to further un...,stock business,positive,Top US regulator warns of potential crisis if ...


In [5]:
df_upsampled['combined_text'] = df_upsampled['Title'] + ': ' + df_upsampled['Data']

In [6]:
df_upsampled.tail(10)

Unnamed: 0,Title,Data,class,sentiment,combined_text
57,Eni to sell 25% stake in biofuel unit to KKR,Italian group will use investment to help fund...,stock business,neutral,Eni to sell 25% stake in biofuel unit to KKR: ...
449,SpaceX wins $733 million launch contract from ...,Elon Musk-led SpaceX has won a $733 million co...,technology,neutral,SpaceX wins $733 million launch contract from ...
483,Elon Musk donates $75 million to pro-Trump gro...,Billionaire Elon Musk donated around $75 milli...,technology,neutral,Elon Musk donates $75 million to pro-Trump gro...
465,Netflix sees 35% QoQ jump in ads membership; Q...,Netflix has posted 35% quarter-on-quarter jump...,technology,neutral,Netflix sees 35% QoQ jump in ads membership; Q...
668,Production at Tata's iPhone plant in Tamil Nad...,Tata Electronics has indefinitely suspended pr...,technology,neutral,Production at Tata's iPhone plant in Tamil Nad...
889,No more Bazball: Rizwan teases Brook for playi...,Pakistan wicketkeeper Muhammad Rizwan teased E...,sports,neutral,No more Bazball: Rizwan teases Brook for playi...
112,Emerging markets are having a moment,US interest rate cuts spur reassessment of ass...,stock business,neutral,Emerging markets are having a moment: US inter...
10,Politics is distorting economic data,Partisanship continues to pollute results of i...,stock business,neutral,Politics is distorting economic data: Partisan...
423,Jellysmack laying off employees amid reorganis...,"Jellysmack, a SoftBank-backed creator-economy ...",technology,neutral,Jellysmack laying off employees amid reorganis...
685,"Batted at 8 in IPL 2024 to give Jadeja, Dube c...",Discussing his decision of batting at eight in...,sports,neutral,"Batted at 8 in IPL 2024 to give Jadeja, Dube c..."


In [7]:
! pip install transformers tensorflow datasets

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


In [8]:
from datasets import Dataset

data = {
    'combined_text': df_upsampled['combined_text'],  # Use your combined text column
    'label': df_upsampled['class']  # Replace with your actual labels column
}

label_mapping = {
    'sports': 0,
    'technology': 1,
    'stock business': 2
    # Add more mappings if you have more classes
}

# Convert the labels
data['label'] = data['label'].map(label_mapping)


# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(pd.DataFrame(data))
dataset = dataset.train_test_split(test_size=0.2)



In [28]:
train_dataset = dataset['train']
test_dataset = dataset['test']

In [12]:
! pip install transformers peft

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip[0m


In [37]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
"""
model_name_2 = "dstefa/roberta-base_topic_classification_nyt_news"
tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
model_2 = TFAutoModelForSequenceClassification.from_pretrained(model_name_2)

model_name_3 = "dima806/news-category-classifier-distilbert"
tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
model_3 = TFAutoModelForSequenceClassification.from_pretrained(model_name_3)"""

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

In [38]:
def tokenize_function(examples):
    return tokenizer(examples["combined_text"], padding="max_length", truncation=True)

# Apply tokenization
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

# Set the format for TensorFlow
tokenized_train_dataset.set_format(type='tensorflow', columns=['input_ids', 'attention_mask', 'label'])
tokenized_test_dataset.set_format(type='tensorflow', columns=['input_ids', 'attention_mask', 'label'])


train_tf_dataset = tokenized_train_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],
    label_cols=['label'],
    shuffle=True,
    batch_size=8
)

test_tf_dataset = tokenized_test_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],
    label_cols=['label'],
    shuffle=False,
    batch_size=8
)

Map:   0%|          | 0/1509 [00:00<?, ? examples/s]

Map:   0%|          | 0/378 [00:00<?, ? examples/s]

In [39]:
import tensorflow as tf

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])




In [41]:
model.fit(train_tf_dataset, epochs=3, validation_data=test_tf_dataset)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x2f6a58110>

In [42]:
# Evaluate the model
"""loss, accuracy = model.evaluate(test_tf_dataset)
print(f"Test Accuracy: {accuracy}")
"""
# Get predictions
predictions = model.predict(test_tf_dataset)



Test Accuracy: 0.9523809552192688


NameError: name 'np' is not defined

In [43]:
import numpy as np
predicted_labels = np.argmax(predictions.logits, axis=1)

In [47]:
from sklearn.metrics import classification_report, confusion_matrix
# Get true labels from the test dataset
true_labels = np.concatenate([y for _, y in test_tf_dataset], axis=0)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=['sports', 'technology', 'stock business']))
print(confusion_matrix(true_labels, predicted_labels))


                precision    recall  f1-score   support

        sports       1.00      1.00      1.00       103
    technology       0.94      0.94      0.94       150
stock business       0.93      0.93      0.93       125

      accuracy                           0.95       378
     macro avg       0.96      0.96      0.96       378
  weighted avg       0.95      0.95      0.95       378

[[103   0   0]
 [  0 141   9]
 [  0   9 116]]


In [53]:
# Save the model and tokenizer
model.save_pretrained('ArticleTag')
tokenizer.save_pretrained('ArticleTag')


('ArticleTag/tokenizer_config.json',
 'ArticleTag/special_tokens_map.json',
 'ArticleTag/vocab.txt',
 'ArticleTag/added_tokens.json',
 'ArticleTag/tokenizer.json')

In [1]:
!pwd

/Users/reetvikchatterjee/Desktop/ArticleTag


In [65]:
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

# Load the model using the Hugging Face library
model = TFAutoModelForSequenceClassification.from_pretrained("../ArticleTagModel1", from_pt=False)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("../ArticleTagModel1")

# Prediction function
def predict_text(input_text):
    # Tokenize the input
    inputs = tokenizer(input_text, return_tensors="tf", truncation=True, padding=True)

    # Perform prediction
    logits = model(inputs["input_ids"]).logits
    predicted_label = tf.argmax(logits, axis=1).numpy()[0]
    return predicted_label

# Example usage
new_text = "I love being a cricket."
prediction = predict_text(new_text)
print(f"Predicted Label: {prediction}")

Some layers from the model checkpoint at ../ArticleTagModel1 were not used when initializing TFDistilBertForSequenceClassification: ['dropout_79']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at ../ArticleTagModel1 and are newly initialized: ['dropout_912']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted Label: 0


In [9]:
! pip install tf-keras



Defaulting to user installation because normal site-packages is not writeable
Collecting tf-keras
  Obtaining dependency information for tf-keras from https://files.pythonhosted.org/packages/8a/ed/e08afca471299b04a34cd548e64e89d0153eda0e6cf9b715356777e24774/tf_keras-2.18.0-py3-none-any.whl.metadata
  Downloading tf_keras-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Collecting keras>=3.5.0 (from tensorflow<2.19,>=2.18->tf-keras)
  Obtaining dependency information for keras>=3.5.0 from https://files.pythonhosted.org/packages/c2/88/eef50051a772dcb4433d1f3e4c1d6576ba450fe83e89d028d7e8b85a2122/keras-3.6.0-py3-none-any.whl.metadata
  Using cached keras-3.6.0-py3-none-any.whl.metadata (5.8 kB)
Downloading tf_keras-2.18.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached keras-3.6.0-py3-none-any.whl (1.2 MB)
Installing collected packages: keras, tf-keras
  Attempting u

In [9]:
df_upsampled.head()

Unnamed: 0,Title,Data,class,sentiment,combined_text
1,Companies’ earnings reports increase volatilit...,Share prices swing as high valuations and unce...,stock business,positive,Companies’ earnings reports increase volatilit...
2,Nasdaq hits record high as tech stocks rebound...,Sharp turnaround from 15% slide as investors m...,stock business,positive,Nasdaq hits record high as tech stocks rebound...
8,The problem with the Trump trade,Speculative hedge funds are placing bets but o...,stock business,positive,The problem with the Trump trade: Speculative ...
12,Karma comes for Boeing’s shareholders,Nearly a fifth of total shares will soon be ow...,stock business,positive,Karma comes for Boeing’s shareholders: Nearly ...
13,Bond market braced for rise in UK debt issuanc...,Investment banks pencil in second-biggest annu...,stock business,positive,Bond market braced for rise in UK debt issuanc...


In [10]:
from datasets import Dataset

data = {
    'combined_text': df_upsampled['combined_text'],  # Use your combined text column
    'label': df_upsampled['sentiment']  # Replace with your actual labels column
}

label_mapping = {
    'positive': 0,
    'negative': 1,
    'neutral': 2
    # Add more mappings if you have more classes
}

# Convert the labels
data['label'] = data['label'].map(label_mapping)


# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(pd.DataFrame(data))
dataset = dataset.train_test_split(test_size=0.2)

In [11]:
train_dataset = dataset['train']
test_dataset = dataset['test']

In [14]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

model_name = "shashanksrinath/News_Sentiment_Analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3, from_pt=True)


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.


In [22]:
def tokenize_function(examples):
    # Ensure tokenization includes padding and truncation for consistency
    return tokenizer(
        examples["combined_text"],
        padding="max_length",
        truncation=True,
        max_length=128  # You can adjust this based on your dataset
    )

# Apply tokenization to the datasets
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

# Set the format for TensorFlow compatibility
tokenized_train_dataset.set_format(type='tensorflow', columns=['input_ids', 'attention_mask', 'label'])
tokenized_test_dataset.set_format(type='tensorflow', columns=['input_ids', 'attention_mask', 'label'])

# Convert datasets to TensorFlow format using a data collator for padding
from transformers import DataCollatorWithPadding

# Instantiate the data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

# Create TensorFlow datasets
train_tf_dataset = tokenized_train_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],  # Features
    label_cols='label',                       # Labels
    shuffle=True,
    batch_size=8,
    collate_fn=data_collator
)

test_tf_dataset = tokenized_test_dataset.to_tf_dataset(
    columns=['input_ids', 'attention_mask'],  # Features
    label_cols='label',                       # Labels
    shuffle=False,
    batch_size=8,
    collate_fn=data_collator
)


Map:   0%|          | 0/1509 [00:00<?, ? examples/s]

Map:   0%|          | 0/378 [00:00<?, ? examples/s]

In [23]:
import tensorflow as tf

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])



In [24]:
model.fit(train_tf_dataset, epochs=3, validation_data=test_tf_dataset)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x3493a2b50>

In [25]:
predictions = model.predict(test_tf_dataset)




In [26]:
import numpy as np
predicted_labels = np.argmax(predictions.logits, axis=1)

In [27]:
from sklearn.metrics import classification_report, confusion_matrix
# Get true labels from the test dataset
true_labels = np.concatenate([y for _, y in test_tf_dataset], axis=0)

# Print classification report
print(classification_report(true_labels, predicted_labels, target_names=['positive', 'negative', 'neutral']))
print(confusion_matrix(true_labels, predicted_labels))

              precision    recall  f1-score   support

    positive       0.86      0.78      0.82       131
    negative       0.87      0.93      0.90       114
     neutral       0.89      0.92      0.91       133

    accuracy                           0.88       378
   macro avg       0.87      0.88      0.88       378
weighted avg       0.88      0.88      0.87       378

[[102  16  13]
 [  6 106   2]
 [ 10   0 123]]


In [37]:
for batch in test_tf_dataset:
    input_ids = batch[0]['input_ids'].numpy()
    true_labels = batch[1].numpy()

    # Perform predictions
    outputs = model(batch[0])  # Get the model output
    predictions = outputs.logits  # Extract logits for classification

    # Ensure predictions are in the correct format
    if predictions.ndim == 2:  # Predictions are probabilities for each class
        predicted_labels = np.argmax(predictions, axis=1)
    else:  # Handle unexpected shapes
        raise ValueError("Unexpected shape for predictions")

    for i in range(len(input_ids)):
        text = tokenizer.decode(input_ids[i], skip_special_tokens=True)  # Decode input IDs to text
        true_label = label_map[true_labels[i]]
        predicted_label = label_map[predicted_labels[i]]
        results.append((text, true_label, predicted_label))

# Print the results
for i, (text, true_label, predicted_label) in enumerate(results[:10]):  # Limit to first 10 for readability
    print(f"Sample {i + 1}")
    print(f"Text: {text}")
    print(f"True Label: {true_label}")
    print(f"Predicted Label: {predicted_label}")
    print("-" * 50)


AttributeError: 'TFBaseModelOutputWithPoolingAndCrossAttentions' object has no attribute 'logits'

In [28]:
model.save_pretrained('ArticleTag')
tokenizer.save_pretrained('ArticleTag')

('ArticleTag/tokenizer_config.json',
 'ArticleTag/special_tokens_map.json',
 'ArticleTag/vocab.json',
 'ArticleTag/merges.txt',
 'ArticleTag/added_tokens.json',
 'ArticleTag/tokenizer.json')

In [57]:
model.save('New')

INFO:tensorflow:Assets written to: New/assets


INFO:tensorflow:Assets written to: New/assets


In [38]:
model.summary()

Model: "tf_roberta_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFRobertaMainLaye  multiple                  124645632 
 r)                                                              
                                                                 
Total params: 124645632 (475.49 MB)
Trainable params: 124645632 (475.49 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [64]:
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

# Load the model using the Hugging Face library
model = TFAutoModelForSequenceClassification.from_pretrained("./ArticleTag", from_pt=False)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("./ArticleTag")

# Prediction function
def predict_text(input_text):
    # Tokenize the input
    inputs = tokenizer(input_text, return_tensors="tf", truncation=True, padding=True)

    # Perform prediction
    logits = model(inputs["input_ids"]).logits
    predicted_label = tf.argmax(logits, axis=1).numpy()[0]
    return predicted_label

# Example usage
new_text = "I love being a cricket."
prediction = predict_text(new_text)
print(f"Predicted Label: {prediction}")


All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

All the layers of TFRobertaForSequenceClassification were initialized from the model checkpoint at ./ArticleTag.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForSequenceClassification for predictions without further training.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Predicted Label: 0


In [50]:
outputs = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
print(outputs.last_hidden_state)


tf.Tensor(
[[[-0.31165504  0.6304067  -0.27551326 ...  0.35038495  0.533657
    0.61381775]
  [-0.01692375  0.2869165  -0.44122618 ...  0.44938803  0.27577737
    0.22304904]
  [-0.18907458  0.41051    -0.26473066 ...  0.8877572   0.06143521
    0.28098065]
  ...
  [ 0.2692316  -0.33955    -0.1720831  ... -0.37169123  0.2138595
    0.6617572 ]
  [-0.44324592 -0.2816388  -0.42656094 ... -0.8702068  -0.02141165
    0.6386123 ]
  [-0.3116879   0.6304521  -0.27553228 ...  0.35032886  0.53365624
    0.6137781 ]]], shape=(1, 7, 768), dtype=float32)
