Libraries

In [7]:
import numpy as np
import pandas as pd
from huggingface_hub import HfApi, HfFolder
import spacy
#!python -m spacy download en_core_web_sm
import nltk
#nltk.download('punkt')
from nltk.tokenize import sent_tokenize
import transformers
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, TrainingArguments, Trainer
import torch
from torch.utils.data import DataLoader, Dataset
from torch import nn
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from collections import defaultdict
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

Moving computation to the GPU

In [8]:
# Check if CUDA is available and tell PyTorch to use the GPU if it is
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available! Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

CUDA is available! Using GPU.


Logging into Hugging face

In [9]:
# let us first install relavant libraries from HF
# make sure that we are using the latest libraries which support logging-in via tokens
# install or simple upgrade to latest version (upgrade needed on kaggle notebook)
# for kaggle notebook, you may need to restart runtime to load the upgraded libraries correctly
#%pip install --upgrade huggingface-hub
#%pip install --upgrade transformers

# get your account token from https://huggingface.co/settings/tokens
token = 'your_token_here'

# Save the token to the HfFolder
folder = HfFolder()
folder.save_token(token)

# Now you can use the HfApi with your token
api = HfApi()

Importing reviews

In [10]:
csv_file_path = 'data/reviews_postgres.csv'

reviews_postgres = pd.read_csv(csv_file_path)

Some info for me

In [11]:
reviews_postgres.head()

Unnamed: 0,review_id,business_id,stars,useful,funny,cool,text
0,i8y_LwbJgC8TjzK02RwqAA,-0TffRSXXIlBYVbb5AwfTg,5,1,0,0,Loved this place. Intimate atmosphere. Very c...
1,Z76s64vGwM0Ga0wkT9tQJQ,-0TffRSXXIlBYVbb5AwfTg,5,0,0,0,"If you love Indian food, you won't be disappoi..."
2,f8ZSTu-qTSyNlWyrrrF28Q,-0TffRSXXIlBYVbb5AwfTg,2,2,3,0,"When is Indian food not Indian. When it's ""fu..."
3,qaeKTVEGgjgAORzAqhN_0w,-0TffRSXXIlBYVbb5AwfTg,5,2,0,0,Lived in London and Scotland for the better pa...
4,5ZaMnOTb9KN_DXj79qtTQA,-0TffRSXXIlBYVbb5AwfTg,4,1,0,0,Walked in last night to find an excellent Indi...


In [12]:
print("Column Names:")
print(reviews_postgres.columns)

print("\nDataFrame Information:")
print(reviews_postgres.info())

Column Names:
Index(['review_id', 'business_id', 'stars', 'useful', 'funny', 'cool', 'text'], dtype='object')

DataFrame Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333265 entries, 0 to 333264
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   review_id    333265 non-null  object
 1   business_id  333265 non-null  object
 2   stars        333265 non-null  int64 
 3   useful       333265 non-null  int64 
 4   funny        333265 non-null  int64 
 5   cool         333265 non-null  int64 
 6   text         333265 non-null  object
dtypes: int64(4), object(3)
memory usage: 17.8+ MB
None


In [13]:
# Using the 'business_id' column to get unique business IDs
unique_business_ids = reviews_postgres['business_id'].unique()

# Setting the seed for reproducibility
seed = 42
# Randomly selecting 200 unique business IDs
random_business_ids = pd.Series(unique_business_ids).sample(200, random_state=seed)

# Filtering the dataframe based on the selected business IDs
filtered_df = reviews_postgres[reviews_postgres['business_id'].isin(random_business_ids)]

In [14]:
print("Column Names:")
print(filtered_df.columns)
print("\nDataFrame Information:")
print(filtered_df.info())
filtered_df.head()

Column Names:
Index(['review_id', 'business_id', 'stars', 'useful', 'funny', 'cool', 'text'], dtype='object')

DataFrame Information:
<class 'pandas.core.frame.DataFrame'>
Index: 24900 entries, 6812 to 333161
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   review_id    24900 non-null  object
 1   business_id  24900 non-null  object
 2   stars        24900 non-null  int64 
 3   useful       24900 non-null  int64 
 4   funny        24900 non-null  int64 
 5   cool         24900 non-null  int64 
 6   text         24900 non-null  object
dtypes: int64(4), object(3)
memory usage: 1.5+ MB
None


Unnamed: 0,review_id,business_id,stars,useful,funny,cool,text
6812,fWQDhkZUMBGe7Cs_kJ8Oew,05ev984NYfimRN0UiFrxaA,2,1,1,1,"Really good happy hour deal. Goes from 5-7, se..."
6813,9GwpUcZDph5jlDaN-TCejQ,05ev984NYfimRN0UiFrxaA,3,0,0,0,"Good food, great atmosphere. I was not blown a..."
6814,EjVS4HDezpGM_rz2EaAEEg,05ev984NYfimRN0UiFrxaA,2,0,0,0,Someone in the kitchen has a heavy hand with t...
6815,ULh5wm-QYENgD_pfH7JOUA,05ev984NYfimRN0UiFrxaA,5,1,0,0,Osteria isn't a Michelin 3-star restaurant. I...
6816,J61OiUEWwjZf9clQkq1kTw,05ev984NYfimRN0UiFrxaA,4,1,0,1,"I like this place, but I wish it could relax a..."


## 1. Preprocessing
Tokenization? Removal of stopwords? Lemmatization or stemming?


In the end we only used lowercasing and tokenization into sentences and kept the 'review_id', 'business_id', 'sentences' columns. Then we expanded the dataframe so there was a row for every sentence to prepare it for labeling and prediction.

In [15]:
# Loading the spaCy English model
nlp = spacy.load("en_core_web_sm")

In [16]:
# Lowercasing and stopwords removal.
filtered_df['text_lowercased'] = filtered_df['text'].apply(lambda x: x.lower())

def remove_stopwords(text):
    doc = nlp(text)
    tokens = [token.text for token in doc if token.is_alpha and not token.is_stop]
    return ' '.join(tokens)

filtered_df['text_removed_stopwords'] = filtered_df['text'].apply(remove_stopwords)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['text_lowercased'] = filtered_df['text'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['text_removed_stopwords'] = filtered_df['text'].apply(remove_stopwords)


In [17]:
filtered_df.head()

Unnamed: 0,review_id,business_id,stars,useful,funny,cool,text,text_lowercased,text_removed_stopwords
6812,fWQDhkZUMBGe7Cs_kJ8Oew,05ev984NYfimRN0UiFrxaA,2,1,1,1,"Really good happy hour deal. Goes from 5-7, se...","really good happy hour deal. goes from 5-7, se...",good happy hour deal Goes select wines spritze...
6813,9GwpUcZDph5jlDaN-TCejQ,05ev984NYfimRN0UiFrxaA,3,0,0,0,"Good food, great atmosphere. I was not blown a...","good food, great atmosphere. i was not blown a...",Good food great atmosphere blown away good bet...
6814,EjVS4HDezpGM_rz2EaAEEg,05ev984NYfimRN0UiFrxaA,2,0,0,0,Someone in the kitchen has a heavy hand with t...,someone in the kitchen has a heavy hand with t...,kitchen heavy hand vinegar Little Gem Salad Ve...
6815,ULh5wm-QYENgD_pfH7JOUA,05ev984NYfimRN0UiFrxaA,5,1,0,0,Osteria isn't a Michelin 3-star restaurant. I...,osteria isn't a michelin 3-star restaurant. i...,Osteria Michelin star restaurant haute cuisine...
6816,J61OiUEWwjZf9clQkq1kTw,05ev984NYfimRN0UiFrxaA,4,1,0,1,"I like this place, but I wish it could relax a...","i like this place, but i wish it could relax a...",like place wish relax osteria food good visit ...


In [18]:
# Applying sentence tokenization to the "text_lowercased" column
filtered_df['sentences'] = filtered_df['text_lowercased'].apply(sent_tokenize)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['sentences'] = filtered_df['text_lowercased'].apply(sent_tokenize)


In [19]:
# Keeping only columns review_id, business_id and sentences
filtered_df = filtered_df[['review_id', 'business_id', 'sentences']]

In [20]:
# Expanding the DataFrame to have one row per sentence
filtered_df = filtered_df.explode('sentences').reset_index(drop=True)
filtered_df.rename(columns={'sentences': 'sentence'}, inplace=True)

## 2. Review features.


1. **Client loyalty & Recommendations**
2. **Restaurant’s General Opinions**
3. **Critical (dangerous to the brand) opinions**
4. **Food Taste & Quality Opinions**
5. **Opinions about specific dishes**
6. **Food Variety & Selection Opinions**
7. **Service Opinions**
8. **Addons Opinions**
9. **Price/Payment Opinions**
10. **Vibe Opinions**

##  3. Fewshot training

We used gpt 3.5 turbo generated synthetic data - synthetic reviews to create our labeled dataset of 100 observations for every of the 10 labels.

In [6]:
# Load the CSV file
fewshot_training_dataset = pd.read_csv('./categories/fewshot_training_dataset.csv')

# Display the first few rows of the DataFrame
print(fewshot_training_dataset.head())

                                                text  label
0  The exceptional service and delicious food ens...      0
1  I can't wait to recommend this place to friend...      0
2  The warm and inviting atmosphere makes it perf...      0
3  I'm definitely coming back; the staff made our...      0
4  From the friendly staff to the tasty dishes, I...      0


In [5]:
class TextClassificationDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        inputs = self.tokenizer(text, padding='max_length', truncation=True, max_length=512, return_tensors='pt')
        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()
        return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': torch.tensor(label)}


In [None]:
# Loading pre-trained model tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Loading pre-trained model
num_labels = 10  # Replace with the number of unique labels in your dataset
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Loading the dataset
texts = fewshot_training_dataset['text'].tolist()
labels = fewshot_training_dataset['label'].tolist()  # Convert labels to list

# Creating a Dataset
dataset = TextClassificationDataset(texts, labels, tokenizer)

# Defining the training arguments
training_args = TrainingArguments(
    output_dir="fewshot_model",  # The output directory
    num_train_epochs=3,  # The number of training epochs
    per_device_train_batch_size=32,  # The batch size for training
    push_to_hub=True,  # Whether to push to the hub
    hub_model_id="vitax10/fewshot_model",  # The model ID to push to the hub
)

# Creating a Trainer
trainer = Trainer(
    model=model,  # The model to train
    args=training_args,  # The training arguments
    train_dataset=dataset,  # The training dataset
)

In [None]:
# Training the model
trainer.train()

In [None]:
trainer.save_model("fewshot_model")

trainer.push_to_hub("https://huggingface.co/vitax10/fewshot_model")

## 4. Label / Category prediction

Labeling:

Labeling our data into different categories can be valuable, especially if we want to perform search or analysis based on specific aspects of the reviews (e.g., service, food quality, price).
Labeling can guide the semantic search engine to focus on certain dimensions or categories when retrieving relevant results.
Accurate labeling is crucial. If the labels accurately represent the content of the reviews, it can enhance the search engine's ability to provide relevant and meaningful results.

Loading the model and tokenizer from private Hugging Face repo.

In [155]:
# Loading the model
model = AutoModelForSequenceClassification.from_pretrained("vitax10/fewshot_model")

In [156]:
# Loading the tokenizer
tokenizer = AutoTokenizer.from_pretrained("vitax10/fewshot_model")

Prediction function

In [161]:
def predict_label(sentence):
    # Prepare the sentence for the model
    inputs = tokenizer(sentence, padding='max_length', truncation=True, max_length=512, return_tensors='pt')
    input_ids = inputs['input_ids'].squeeze().unsqueeze(0)
    attention_mask = inputs['attention_mask'].squeeze().unsqueeze(0)

    # Move inputs to the GPU
    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)

    # Move the model to the GPU
    model.to(device)

    # Make the prediction
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
    
    # Get the predicted class
    predicted_class = outputs.logits.argmax(dim=-1).item()

    return predicted_class

Predicting

In [165]:
# Runs for 4233 min on CPU 
# Number of rows in filtered_df: 24900 - 1 Hour 41 mins with Cuda

# Apply the function to the "sentence" column to create the "label" column
tqdm.pandas(desc="Predicting")
filtered_df['label'] = filtered_df['sentence'].progress_apply(predict_label)

Predicting: 100%|██████████| 189102/189102 [10:10:43<00:00,  5.16it/s] 


In [166]:
print(filtered_df.head())

                review_id             business_id  \
0  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
1  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
2  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
3  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
4  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   

                                                                            sentence  \
0                                                       really good happy hour deal.   
1  goes from 5-7, select wines, spritzers, and cocktails for $5-6 and peroni for $4!   
2   also, can't go wrong with a margarita pizza (crispy and delicious crust) for $6!   
3                             the other dishes on the menu all looked great as well.   
4                the bartenders are pretty flustered, overwhelmed, and kind of mean.   

   label  
0      0  
1      5  
2      0  
3      5  
4      5  


In [167]:
label_counts = filtered_df['label'].value_counts()
print(label_counts)

label
0    73335
5    34532
4    31983
2    12483
8    11068
6     7799
1     7015
3     3753
9     3685
7     3449
Name: count, dtype: int64


In [168]:
filtered_df.to_csv('labeled_full_df.csv', index=False)

## 5. Embedding preparation

http://unicornnlp.com/?Semantic-Analysis-for-Restaurant-Reviews

Category 1: Client loyalty & Recommendations

Category 2: Restaurant’s General Opinions

Category 3: Critical (dangerous to the brand) opinions

Category 4: Food Taste & Quality Opinions

Category 5: Opinions about specific dishes

Category 6: Food Variety & Selection Opinions

Category 7: Service Opinions

Category 8: Addons Opinions

Category 9: Price/Payment Opinions

Category 10: Vibe Opinions


In [169]:
filtered_df_changed = filtered_df.copy()

In [170]:
# Changing the labels to their corresponding category names
filtered_df_changed['label'] = filtered_df_changed['label'].map({
    0: 'Client loyalty & Recommendations',
    1: 'Restaurant"s General Opinions',
    2: 'Critical (dangerous to the brand) opinions',
    3: 'Food Taste & Quality Opinions',
    4: 'Opinions about specific dishes',
    5: 'Food Variety & Selection Opinions',
    6: 'Service Opinions',
    7: 'Addons Opinions',
    8: 'Price/Payment Opinions',
    9: 'Vibe Opinions'
})

                review_id             business_id  \
0  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
1  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
2  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
3  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   
4  fWQDhkZUMBGe7Cs_kJ8Oew  05ev984NYfimRN0UiFrxaA   

                                                                            sentence  \
0                                                       really good happy hour deal.   
1  goes from 5-7, select wines, spritzers, and cocktails for $5-6 and peroni for $4!   
2   also, can't go wrong with a margarita pizza (crispy and delicious crust) for $6!   
3                             the other dishes on the menu all looked great as well.   
4                the bartenders are pretty flustered, overwhelmed, and kind of mean.   

                               label  
0   Client loyalty & Recommendations  
1  Food Variety & Selection Opinions  
2   Client loyalty & Recommendation

In [None]:
# Printing the updated DataFrame
print(filtered_df_changed.head())

## 6. Embedding representation




In [172]:
# Preparing Data
model_name = 'all-mpnet-base-v2'
model = SentenceTransformer(model_name)

business_embeddings = []

for business_id in filtered_df_changed['business_id'].unique():  # Using filtered_df_changed
    business_sentences = filtered_df_changed[filtered_df_changed['business_id'] == business_id]  # Using filtered_df_changed

    # Dictionary to store sentences for each category
    category_sentences = defaultdict(list)
    for index, row in business_sentences.iterrows():
        category_name = row['label']
        sentence = row['sentence']
        category_sentences[category_name].append(sentence)

    # List of strings representing each category and its sentences
    category_texts = [f"({category}): ({', '.join(sentences)})" for category, sentences in category_sentences.items()]

    # Concatenating all category texts into one, with a newline character after each category
    all_category_text = '\n'.join(category_texts)

    # Embedding the concatenated text
    business_embedding = model.encode(all_category_text, show_progress_bar=True)

    business_embeddings.append({
        'business_id': business_id,
        'np_embeddings': business_embedding.squeeze().tolist()  # Convert numpy array to list
    })

# Resulting DataFrame
result_df = pd.DataFrame(business_embeddings)

# Saving DataFrame to CSV
result_df.to_csv('embeddings_businesses_df_real.csv', index=False)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
print(all_category_text)

In [None]:
print(result_df.head())