In [2]:
# CELL 1: Package Installations
!pip install --upgrade pip -q
!pip install --upgrade torch torchvision torchaudio -q
!pip install --upgrade transformers accelerate peft bitsandbytes safetensors -q
!pip install pandas pyarrow scikit-learn seaborn tqdm rich pingouin shap matplotlib

# Confirm Versions
import torch
import transformers
import accelerate
import peft
import safetensors

print(f"Torch version: {torch.__version__}")
print(f"Transformers version: {transformers.__version__}")
print(f"PEFT version: {peft.__version__}")
print(f"Accelerate version: {accelerate.__version__}")
print(f"Safetensors version: {safetensors.__version__}")
print(f"Torch CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version for PyTorch: {torch.version.cuda}")

print("-" * 70)
print("Installation cell complete.")
print("-" * 70)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m821.2/821.2 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m393.1/393.1 MB[0m [31m58.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m160.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m193.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.7/897.7 kB[0m [31m52.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.0/571.0 MB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.2/200.2 MB[0m [31m150.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# ╔════════════════════════════════════════════════════════════════════╗
# ║  Mount Google Drive                                                ║
# ╚════════════════════════════════════════════════════════════════════╝
from google.colab import drive
drive.mount('/content/drive')

# Put the absolute path to your project root on Drive:
PROJ = "/content/drive/MyDrive/Master"
%cd $PROJ

Mounted at /content/drive
/content/drive/MyDrive/Master


In [4]:
# ╔════════════════════════════════════════════════════════════════════╗
# ║  Environment sanity-check                                          ║
# ╚════════════════════════════════════════════════════════════════════╝
import torch, transformers, pandas as pd, os, platform, sys, json, math
print("GPU:", torch.cuda.get_device_name(0))
print("torch:", torch.__version__,  " transformers:", transformers.__version__)
print("rows in features file:", len(pd.read_feather("data/02_features.feather")))

GPU: NVIDIA A100-SXM4-40GB
torch: 2.6.0+cu124  transformers: 4.52.4
rows in features file: 18110


In [None]:
!python rq1_emotion_analysis_5k.py

In [None]:
!python rq1_emotion_analysis_10k.py

In [None]:
!python rq1_emotion_analysis_40k.py

In [None]:
!python rq1_emotion_analysis_80k.py

In [None]:
!python rq2_similarity_stats_5k.py

In [None]:
!python rq2_similarity_stats_10k.py

In [None]:
!python rq2_similarity_stats_40k.py

In [None]:
!python rq2_similarity_stats_80k.py

In [None]:
# only SVM-TFIDF + Random-Forest 5k
!python rq3_classification_met.py --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/5k/02_features.feather" \
--model none --no-shap --run-name pysentimentio_5k_classic_only5k


In [None]:
## DistilBERT 5k
!python rq3_classification_met.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/5k/02_features.feather" \
  --model distilbert-base-uncased \
  --epochs 12 --batch 64  --grad-accum 1 \
  --no-baselines \
  --run-name 5k_e12_b64_distil

In [None]:
## DeBERTa-v3-base 5k
!python rq3_classification_met.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/5k/02_features.feather" \
  --model microsoft/deberta-v3-base \
  --epochs 10 --batch 16 --grad-accum 2 \
  --no-baselines \
  --run-name 5k_e10_b16_ga2_deberta

In [None]:
# only SVM-TFIDF + Random-Forest 10k
!python rq3_classification_met.py --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/10k/02_features.feather" \
--model none --no-shap --run-name pysentimentio_10k_classic_only10k


In [None]:
## DistilBERT 10k
!python rq3_classification_met.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/10k/02_features.feather" \
  --model distilbert-base-uncased \
  --epochs 6 --batch 64  --grad-accum 1 \
  --no-baselines \
  --run-name 10k_e6_b64_distil

In [None]:
# DeBERTa-v3-base 10k
!python rq3_classification_met.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/10k/02_features.feather" \
  --model microsoft/deberta-v3-base \
  --epochs 8 --batch 16 --grad-accum 2 \
  --no-baselines \
  --run-name 10k_e8_b16_ga2_deberta

In [None]:
# only SVM-TFIDF + Random-Forest 40k
!python rq3_classification.py --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/40k/02_features.feather" \
--model none --no-shap --run-name pysentimentio_40k_classic_only40k


In [None]:
# DistilBERT 40k
!python rq3_classification.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/40k/02_features.feather" \
  --model distilbert-base-uncased \
  --epochs 8 --batch 64  --grad-accum 1 \
  --no-baselines \
  --run-name 40k_e8_b64_distil

In [None]:
# DeBERTa-v3-base 40k
!python rq3_classification.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/40k/02_features.feather" \
  --model microsoft/deberta-v3-base \
  --epochs 8 --batch 16 --grad-accum 2 \
  --no-baselines \
  --run-name 40k_e8_b16_ga2_deberta

In [None]:
# DistilBERT 80k
!python rq3_classification.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/80k/02_features.feather" \
  --model distilbert-base-uncased \
  --epochs 8 --batch 64  --grad-accum 1 \
  --no-baselines \
  --run-name 80k_e8_b64_distil

In [None]:
# DeBERTa-v3-base 80k
!python rq3_classification.py \
  --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/80k/02_features.feather" \
  --model microsoft/deberta-v3-base \
  --epochs 8 --batch 32 --grad-accum 2 \
  --no-baselines \
  --run-name 80k_e8_b16_ga2_deberta

In [None]:
# only SVM-TFIDF + Random-Forest 80k
!python rq3_classification.py --data "/content/drive/MyDrive/Master/data/pysentimentio/revised-preprocess/80k/02_features.feather" \
--model none --no-shap --run-name pysentimentio_80k_classic_only80k