In [7]:
from google.colab import drive
drive.mount('/content/drive')  # mount your Google Drive


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import torch

# ===== Check GPU availability =====
if torch.cuda.is_available():
    print(f"✅ GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ GPU not available, using CPU")

# ===== Paths =====
data_path = "/content/drive/MyDrive/svnit_shared_task/shared_task/bhasha-workshop/hindi_sentences_augmented_10k.csv"
save_folder = "/content/drive/MyDrive/svnit_shared_task/shared_task/bhasha-workshop/Task1_I/Data/Augmented_Data_Split/Hindi"

# Create folder if it doesn't exist
os.makedirs(save_folder, exist_ok=True)

# ===== Load dataset =====
df = pd.read_csv(data_path)

# ===== 80:20 Train-Test Split =====
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)  # splits 80% train, 20% test

# ===== Save CSVs =====
train_file = os.path.join(save_folder, "aug_train.csv")
test_file = os.path.join(save_folder, "aug_test.csv")
train_df.to_csv(train_file, index=False)
test_df.to_csv(test_file, index=False)

# ===== Print confirmation =====
print(f"✅ Train CSV saved: {train_file}")
print(f"✅ Test CSV saved: {test_file}")


✅ GPU is available: Tesla T4
✅ Train CSV saved: /content/drive/MyDrive/svnit_shared_task/shared_task/bhasha-workshop/Task1_I/Data/Augmented_Data_Split/Hindi/aug_train.csv
✅ Test CSV saved: /content/drive/MyDrive/svnit_shared_task/shared_task/bhasha-workshop/Task1_I/Data/Augmented_Data_Split/Hindi/aug_test.csv
