In [None]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
import torch
from torch.utils.data import Dataset, DataLoader

# Load data into pandas DataFrames
train_data = pd.read_csv('train_data.csv')
dev_data = pd.read_csv('dev_data.csv')
test_data = pd.read_csv('test_data.csv')

# Use the RandomOverSampler from imblearn to oversample the minority class in the train set
oversampler = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = oversampler.fit_resample(train_data['sentence'].values.reshape(-1, 1), train_data['label'])

# Convert the oversampled data back into pandas DataFrames
X_train_resampled = pd.DataFrame(X_train_resampled.reshape(-1), columns=['sentence'])
y_train_resampled = pd.Series(y_train_resampled)

# Define a custom PyTorch Dataset class for the resampled train set
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return len(self.X)

# Create DataLoader objects for the resampled train set, dev set, and test set
train_dataset = MyDataset(X_train_resampled, y_train_resampled)
dev_dataset = MyDataset(dev_data['sentence'], dev_data['label'])
test_dataset = MyDataset(test_data['sentence'], test_data['label'])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
