In [1]:
# !pip install transformers
# !pip install scikit-learn
# !pip install pandas


In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [3]:
train = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/train.parquet')
test = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/test.parquet')

print("Train shape:", train.shape, "| Test shape:", test.shape)
print(train.columns)
train.head(3)


Train shape: (48439, 8) | Test shape: (3, 5)
Index(['id', 'prompt', 'response_a', 'response_b', 'winner', 'model_a',
       'model_b', 'language'],
      dtype='object')


Unnamed: 0,id,prompt,response_a,response_b,winner,model_a,model_b,language
0,00007cff95d7f7974642a785aca248b0f26e60d3312fac...,vieš po Slovensky?,"Áno, hovorím po slovensky. Ako vám môžem pomôcť?","Áno, veď som tu! Môžem ti pomôcť s otázkami al...",model_a,o1-preview,reka-core-20240904,Slovak
1,00010ed04b536f56ebe43eef1100c13906abea12bf9855...,You will be given a piece of news. Analyze it ...,Let's break down the news and analyze it accor...,"```json\n{\n ""contains_orgs"": true,\n ""orgs""...",model_a,gemma-2-27b-it,gemini-1.5-flash-002,Russian
2,0003800d510e38803efba5ceaec122bc66408fe367b0be...,"Dört basamaklı, rakamları birbirinden ve sıfır...","Bu soruyu çözmek için, verilen koşulları adım ...",Bu problemi adım adım çözelim:\n\n1) ABCD - DC...,model_a,gpt-4-0125-preview,claude-3-5-sonnet-20240620,Turkish


In [4]:
train['label'] = (train['winner'] == 'model_a').astype(int)
# label = 1 means model_a, 0 means model_b

train['label'].value_counts(dropna=False)


label
0    24481
1    23958
Name: count, dtype: int64

In [5]:
def combine_text(row):
    # You could also add special tokens or separators
    return f"PROMPT: {row['prompt']} RESP_A: {row['response_a']} RESP_B: {row['response_b']}"

train['combined_text'] = train.apply(combine_text, axis=1)
test['combined_text']  = test.apply(combine_text, axis=1)


In [6]:
# Create the TF-IDF vectorizer
tfidf = TfidfVectorizer(
    max_features=20000,   # limit features for speed
    ngram_range=(1,2),    # unigrams + bigrams
    analyzer='word', 
    stop_words=None       # or consider language-specific stopwords
)

# Fit on train set, transform both train and test
X_train_tfidf = tfidf.fit_transform(train['combined_text'])
X_test_tfidf  = tfidf.transform(test['combined_text'])

y_train = train['label'].values


In [7]:
clf = LogisticRegression(max_iter=200)
clf.fit(X_train_tfidf, y_train)


In [8]:
X_tr, X_val, y_tr, y_val = train_test_split(
    X_train_tfidf, y_train, test_size=0.2, random_state=42
)

temp_clf = LogisticRegression(max_iter=200)
temp_clf.fit(X_tr, y_tr)
val_preds = temp_clf.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_preds))


Validation Accuracy: 0.5016515276630884


In [9]:
test_preds = clf.predict(X_test_tfidf)
pred_winner = ["model_a" if p == 1 else "model_b" for p in test_preds]

submission = pd.DataFrame({
    "id": test["id"],
    "winner": pred_winner
})
submission.head()


Unnamed: 0,id,winner
0,327228,model_b
1,1139415,model_b
2,1235630,model_b


In [10]:
submission.to_csv("submission.csv", index=False)


In [11]:
import pandas as pd
import numpy as np
import torch

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoModelForSeq2SeqLM,
    pipeline
)


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [13]:
# Provided
train = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/train.parquet')
test = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/test.parquet')

print(train.shape, test.shape)
train.head()


(48439, 8) (3, 5)


Unnamed: 0,id,prompt,response_a,response_b,winner,model_a,model_b,language
0,00007cff95d7f7974642a785aca248b0f26e60d3312fac...,vieš po Slovensky?,"Áno, hovorím po slovensky. Ako vám môžem pomôcť?","Áno, veď som tu! Môžem ti pomôcť s otázkami al...",model_a,o1-preview,reka-core-20240904,Slovak
1,00010ed04b536f56ebe43eef1100c13906abea12bf9855...,You will be given a piece of news. Analyze it ...,Let's break down the news and analyze it accor...,"```json\n{\n ""contains_orgs"": true,\n ""orgs""...",model_a,gemma-2-27b-it,gemini-1.5-flash-002,Russian
2,0003800d510e38803efba5ceaec122bc66408fe367b0be...,"Dört basamaklı, rakamları birbirinden ve sıfır...","Bu soruyu çözmek için, verilen koşulları adım ...",Bu problemi adım adım çözelim:\n\n1) ABCD - DC...,model_a,gpt-4-0125-preview,claude-3-5-sonnet-20240620,Turkish
3,00072026c68f5418ef2da238394e418ce72a534b9b22d5...,"현재 추천된 탑 3 종목인 Cabaletta Bio (CABA), Rocket Ph...","죄송하지만 저는 금융 조언을 제공할 수 없습니다. 저는 AI 모델이며, 투자 결정에...",현재 추천된 탑 3 종목에 순위를 매기기 위해서는 여러 가지 요소들을 고려해야 합니...,model_b,gemma-2-2b-it,llama-3.1-nemotron-70b-instruct,English
4,0007ce7cf6bc1b5a8f8a4669b854fb12030863c970d9dc...,Please be boring,"Alright, I'll be as boring as possible.\n\nTod...","Understood. Here is a straightforward, unadorn...",model_a,reka-flash-20240722,grok-2-2024-08-13,English


In [14]:
summarization_model_path = "/kaggle/input/bart-large-cnn/transformers/default/1/bart-large-cnn"
summarizer_tokenizer = AutoTokenizer.from_pretrained(summarization_model_path)
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_path).to(device)

# Optionally create a pipeline for easy summarization
summarizer = pipeline(
    task="summarization", 
    model=summarizer_model, 
    tokenizer=summarizer_tokenizer,
    device=0 if device.type == "cuda" else -1
)


Device set to use cuda:0


In [15]:
summarizer.model.to("cpu")
# Optionally summarizer.tokenizer is still fine on CPU


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
    

In [16]:
def maybe_summarize(text, tokenizer, summarizer, max_token_len=2024, max_summary_len=1028):
    # Tokenize to check length
    tokens = tokenizer.encode(text, truncation=False, add_special_tokens=False)
    if len(tokens) > max_token_len:
        # Summarize if the text is too long
        summary = summarizer(
            text,
            max_length=max_summary_len,
            min_length=30,
            do_sample=False
        )
        return summary[0]['summary_text']
    else:
        return text


In [17]:
train['prompt'] = train['prompt'].apply(
    lambda x: maybe_summarize(str(x), summarizer_tokenizer, summarizer)
)
train['response_a'] = train['response_a'].apply(
    lambda x: maybe_summarize(str(x), summarizer_tokenizer, summarizer)
)
train['response_b'] = train['response_b'].apply(
    lambda x: maybe_summarize(str(x), summarizer_tokenizer, summarizer)
)


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [None]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
accumulation_steps = 4
for step, batch in enumerate(train_loader):
    # Forward pass
    outputs = model(**batch)
    loss = outputs.loss / accumulation_steps
    loss.backward()
    
    # Update weights only every `accumulation_steps` steps
    if step % accumulation_steps == 0:
        optimizer.step()
        optimizer.zero_grad()
# Check tokenized input lengths
def check_token_lengths(dataset, tokenizer):
    for i in range(len(dataset)):
        row = dataset.iloc[i]
        tokens = tokenizer.encode(
            row['prompt'] + " [SEP] " + row['response_a'] + " [SEP] " + row['response_b'], 
            truncation=False
        )
        if len(tokens) > 512:
            print(f"Row {i} exceeds max length: {len(tokens)} tokens")
            return False
    return True

if not check_token_lengths(train, tokenizer):
    print("Some rows exceed the max token length.")

def truncate_text(text, max_length=1024):
    # Truncate text if it exceeds the max allowable length
    return text[:max_length]

train['prompt'] = train['prompt'].apply(lambda x: truncate_text(x))
train['response_a'] = train['response_a'].apply(lambda x: truncate_text(x))
train['response_b'] = train['response_b'].apply(lambda x: truncate_text(x))



In [None]:
#!pip install transformers datasets scikit-learn pandas

from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Path to the uploaded model in Kaggle
model_path = "/kaggle/input/xlm-roberta-base/transformers/default/1/xlm-roberta-base"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Move the model to GPU if available
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


In [None]:
import pandas as pd

# Load the train and test Parquet files
train = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/train.parquet')
test = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/test.parquet')


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from torch.utils.data import Dataset, DataLoader
import torch

# Load the train and test Parquet files
train = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/train.parquet')
test = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/test.parquet')

# Split the training data into training and validation sets
train_data, val_data = train_test_split(train, test_size=0.2, random_state=42)

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/xlm-roberta-base/transformers/default/1/xlm-roberta-base")

# Tokenize combined input
class ChatbotDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        inputs = self.tokenizer(
            row['prompt'] + " [SEP] " + row['response_a'] + " [SEP] " + row['response_b'],
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        label = torch.tensor(0 if row['winner'] == "model_a" else 1)
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze(),
            'labels': label
        }

# Prepare datasets
train_dataset = ChatbotDataset(train_data, tokenizer, max_length=512)
val_dataset = ChatbotDataset(val_data, tokenizer, max_length=512)
test_dataset = ChatbotDataset(test, tokenizer, max_length=512)

# Prepare dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)


In [None]:
# Get one batch of data from the train_loader
batch = next(iter(train_loader))

# Extract input_ids, attention_mask, and labels
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
labels = batch['labels']

# Print the batch details
print("Input IDs Shape:", input_ids.shape)
print("Attention Mask Shape:", attention_mask.shape)
print("Labels Shape:", labels.shape)

# # Print a sample input and its corresponding label
# print("\nSample Input IDs:", input_ids[0])
# print("Sample Attention Mask:", attention_mask[0])
# print("Sample Label:", labels[0])


In [None]:
# Analyze token lengths
token_lengths = [len(input_id) for batch in train_loader for input_id in batch['input_ids']]
print("\nToken Lengths Statistics:")
print(f"Mean Length: {sum(token_lengths)/len(token_lengths):.2f}")
print(f"Max Length: {max(token_lengths)}")
print(f"Min Length: {min(token_lengths)}")


In [None]:
import numpy as np

In [None]:
original_lengths = [len(tokenizer.encode(row['prompt'] + " [SEP] " + row['response_a'] + " [SEP] " + row['response_b'])) for _, row in train.iterrows()]
print(f"Original Token Lengths - Mean: {np.mean(original_lengths):.2f}, Max: {np.max(original_lengths)}, Min: {np.min(original_lengths)}")


In [None]:
import pandas as pd

# Load the train and test Parquet files
train = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/train.parquet')
test = pd.read_parquet('/kaggle/input/datasets1/wsdm-cup-multilingual-chatbot-arena (1)/test.parquet')

# # # Display the data
# print(train.head())
# print(train.info())
# print(test.head())


In [None]:
import matplotlib.pyplot as plt

# Language distribution
#train['language'].value_counts().plot(kind='bar', title='Language Distribution')
#plt.show()

# Winner distribution
#train['winner'].value_counts().plot(kind='bar', title='Winner Distribution')
#plt.show()


In [None]:
import json

model_path = '/kaggle/input/all-minilm-l6-v2/all-MiniLM-L6-v2'

# Check if the config.json file exists
config_file_path = f"{model_path}/config.json"
print("Config file exists:", os.path.exists(config_file_path))

# Load and print the content of config.json
with open(config_file_path, "r") as f:
    config = json.load(f)
print("Config file content:", config)


In [None]:
from transformers import AutoModel, AutoTokenizer

# Path to the model files
model_path = '/kaggle/input/all-minilm-l6-v2/all-MiniLM-L6-v2'

# Load the model and tokenizer directly from the local files
model = AutoModel.from_pretrained(model_path, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)

# Test the model with a sample sentence
inputs = tokenizer("This is a test sentence.", return_tensors="pt")
outputs = model(**inputs)

#print("Model outputs:", outputs)


In [None]:
from transformers import AutoModel, AutoTokenizer
import torch

# Define the model path
model_path = '/kaggle/input/all-minilm-l6-v2/all-MiniLM-L6-v2'

# Load the model and tokenizer
model = AutoModel.from_pretrained(model_path, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)

# Combine responses for embedding
train['combined_responses'] = train['response_a'] + " " + train['response_b']
test['combined_responses'] = test['response_a'] + " " + test['response_b']

# Function to encode responses using the transformers model
def encode_responses(responses, model, tokenizer, device='cpu', batch_size=32):
    """Encodes responses using the provided model and tokenizer."""
    embeddings = []
    model.to(device)
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():
        for i in range(0, len(responses), batch_size):
            batch = responses[i:i + batch_size]
            inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
            outputs = model(**inputs)
            # Use `pooler_output` for sentence embeddings
            embeddings.append(outputs.pooler_output.cpu())

    return torch.cat(embeddings)

# Encode training and testing data
device = 'cuda' if torch.cuda.is_available() else 'cpu'
X_train_bert = encode_responses(train['combined_responses'].tolist(), model, tokenizer, device=device)
X_test_bert = encode_responses(test['combined_responses'].tolist(), model, tokenizer, device=device)

# Check shapes of the embeddings
print("Train embeddings shape:", X_train_bert.shape)
print("Test embeddings shape:", X_test_bert.shape)


In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode the 'winner' column as numerical labels
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train['winner'])  # 0: model_a, 1: model_b

# Check the mapping
print("Label mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))


In [None]:
from sklearn.model_selection import train_test_split

X_train_split, X_val, y_train_split, y_val = train_test_split(
    X_train_bert, y_train, test_size=0.2, random_state=42
)


In [None]:
X_train_split.shape, X_val.shape, y_train_split.shape, y_val.shape

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a feedforward neural network
class PreferenceModel(nn.Module):
    def __init__(self, input_size):
        super(PreferenceModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),  # Hidden layer 1
            nn.ReLU(),
            nn.Dropout(0.3),            # Dropout for regularization
            nn.Linear(128, 64),         # Hidden layer 2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)            # Output layer (binary classification)
        )

    def forward(self, x):
        return self.network(x)


In [None]:
import torch

# Assuming y_train is a NumPy array or list of labels
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Ensure labels are of type Long

# Use the embeddings directly if they are already PyTorch tensors
X_train_tensor = X_train_bert.float()  # Convert to float if not already


In [None]:
from collections import Counter

# Assuming y_train is a list or numpy array of labels
class_counts = Counter(y_train)
print("Class Distribution:", class_counts)
import matplotlib.pyplot as plt

# For Counter object or pandas Series
plt.bar(class_counts.keys(), class_counts.values())
plt.title("Class Distribution")
plt.xlabel("Class Labels")
plt.ylabel("Number of Samples")
plt.show()


In [None]:
print("y_train Sample:", y_train[:10])
print("Class Distribution:", Counter(y_train))


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define the improved classifier
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleClassifier, self).__init__()
        self.fc = nn.Linear(input_size, num_classes)
    
    def forward(self, x):
        return self.fc(x)



# Model setup
input_size = 384  # Embedding size
num_classes = 2   # Binary classification (0 or 1)



# Define a simple linear model
model = SimpleClassifier(input_size=384, num_classes=2)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# Training loop
num_epochs = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        # Forward pass
        outputs = model(X_batch)
        # During training

        loss = criterion(outputs, y_batch)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {avg_loss:.4f}")

print("Training complete!")


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from torch.utils.data import DataLoader, Dataset
import torch
import pandas as pd

# Custom Dataset Class
class ChatbotDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        text = f"{row['response_a']} [SEP] {row['response_b']}"
        inputs = self.tokenizer(text, max_length=self.max_len, padding="max_length", truncation=True, return_tensors="pt")
        label = 0 if row['winner'] == 'model_a' else 1
        return {**inputs, "label": torch.tensor(label)}

# Load Dataset
train = pd.read_csv("train.csv")  # Replace with actual file path
test = pd.read_csv("test.csv")

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
train_dataset = ChatbotDataset(train, tokenizer, max_len=512)
test_dataset = ChatbotDataset(test, tokenizer, max_len=512)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

# Model Setup
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=2)
model.to(device)

# Optimizer and Loss
optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

# Training Loop
epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch["input_ids"].squeeze(1).to(device)
        attention_mask = batch["attention_mask"].squeeze(1).to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].squeeze(1).to(device)
        attention_mask = batch["attention_mask"].squeeze(1).to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
        
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

# Classification Report
from sklearn.metrics import classification_report
print(classification_report(true_labels, predictions, target_names=["model_a", "model_b"]))


In [None]:
from sklearn.metrics import classification_report

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    X_test_bert = X_test_bert.to(device)
    test_outputs = model(X_test_bert)
    predictions = torch.argmax(test_outputs, dim=1).cpu()

# Generate a classification report
print(classification_report(y_test, predictions, target_names=["Class 0", "Class 1"]))
