<a href="https://colab.research.google.com/github/RyuichiSaito1/inflation-reddit-usa/blob/main/notebooks/llama_3_2_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using A100 GPU

In [None]:
from google.colab import drive
drive.mount('/content/drive')

from google.colab import auth
auth.authenticate_user()

In [None]:
# Install required packages
!pip uninstall -y transformers
!pip install transformers==4.44.0
!pip install datasets scikit-learn matplotlib torch torchvision torchaudio
!pip install accelerate bitsandbytes

# 1,050

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

class TimeTrackerCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, model=None, **kwargs):
        self.start_time = time.time()

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        elapsed_time = time.time() - self.start_time
        print(f"Epoch {state.epoch} training time: {elapsed_time:.2f} seconds")

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Define the IMF economist prompt
INFLATION_PROMPT = """You are a chief economist at the IMF. I would like you to infer the public perception of inflation from Reddit posts. Please classify each Reddit post into one of the following categories: 0: The post indicates deflation, such as the lower price of goods or services (e.g., "the prices are not bad"), affordable services (e.g., "this champagne is cheap and delicious"), sales information (e.g., "you can get it for only 10 dollars."), or a declining and buyer's market. 2: The post indicates or includes inflation, such as the higher price of goods or services (e.g., "it's not cheap"), the unreasonable cost of goods or services (e.g., "the food is overpriced and cold"), consumers struggling to afford necessities (e.g., "items are too expensive to buy"), shortage of goods of services, or mention about an asset bubble. 1: The post indicates neither deflation (0) nor inflation (2). This category also includes just questions to a community, social statements not personal experience, factual observations, references to originally expensive or cheap goods or services (e.g., "a gorgeous and costly dinner" or "an affordable Civic"), website promotion, authors' wishes, or illogical text. Please choose a stronger stance when the text includes both 0 and 2 stances. If these stances are of the same degree, answer 1.

Reddit Post: {post}

Classification:"""

In [None]:
# 1. Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/world-inflation/data/reddit/production/main-prod-1040.csv', sep=',')
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['inflation'].value_counts()}")

# Add formatted prompt to each post
def format_with_prompt(post):
    return INFLATION_PROMPT.format(post=post)

df['formatted_body'] = df['body'].apply(format_with_prompt)

# 2. Split into training and validation sets (75:25 ratio)
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['inflation'])

# 3. Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# 4. Initialize tokenizer and model
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for better L4 GPU compatibility
    device_map="auto"
)

# Ensure the model uses the correct pad token
model.config.pad_token_id = tokenizer.pad_token_id

# 5. Tokenization function - now using formatted_body with prompt
def tokenize_function(examples):
    tokenized = tokenizer(
        examples['formatted_body'],  # Use formatted_body instead of body
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized['labels'] = examples['inflation']
    return tokenized

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)

# 6. Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# 7. Set up training arguments
training_args = TrainingArguments(
    # Output and logging
    output_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning",
    logging_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning/logs",

    # Evaluation and saving
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,

    # Training parameters
    learning_rate=5e-5,  # Slightly higher for Llama
    num_train_epochs=4,  # Start with fewer epochs
    per_device_train_batch_size=4,  # Smaller batch size for 3B model
    per_device_eval_batch_size=4,

    # Regularization
    weight_decay=0.01,
    warmup_ratio=0.1,

    # Model selection
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,  # Fixed: accuracy should be greater_is_better=True

    # Efficiency settings
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    bf16=True,  # Use BF16 instead of FP16 for L4 GPU compatibility
    dataloader_pin_memory=True,
    remove_unused_columns=True,

    # Reproducibility
    seed=42,

    # Reporting
    run_name="llama-3.2-3b-inflation-1040",
    report_to="none"
)

In [None]:
# 8. Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[TimeTrackerCallback()]
)

# 9. Start training
print("Starting training...")
trainer.train()

# 540

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

class TimeTrackerCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, model=None, **kwargs):
        self.start_time = time.time()

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        elapsed_time = time.time() - self.start_time
        print(f"Epoch {state.epoch} training time: {elapsed_time:.2f} seconds")

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Define the IMF economist prompt
INFLATION_PROMPT = """You are a chief economist at the IMF. I would like you to infer the public perception of inflation from Reddit posts. Please classify each Reddit post into one of the following categories: 0: The post indicates deflation, such as the lower price of goods or services (e.g., "the prices are not bad"), affordable services (e.g., "this champagne is cheap and delicious"), sales information (e.g., "you can get it for only 10 dollars."), or a declining and buyer's market. 2: The post indicates or includes inflation, such as the higher price of goods or services (e.g., "it's not cheap"), the unreasonable cost of goods or services (e.g., "the food is overpriced and cold"), consumers struggling to afford necessities (e.g., "items are too expensive to buy"), shortage of goods of services, or mention about an asset bubble. 1: The post indicates neither deflation (0) nor inflation (2). This category also includes just questions to a community, social statements not personal experience, factual observations, references to originally expensive or cheap goods or services (e.g., "a gorgeous and costly dinner" or "an affordable Civic"), website promotion, authors' wishes, or illogical text. Please choose a stronger stance when the text includes both 0 and 2 stances. If these stances are of the same degree, answer 1.

Reddit Post: {post}

Classification:"""

In [None]:
# 1. Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/world-inflation/data/reddit/production/main-prod-520.csv', sep=',')
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['inflation'].value_counts()}")

# Add formatted prompt to each post
def format_with_prompt(post):
    return INFLATION_PROMPT.format(post=post)

df['formatted_body'] = df['body'].apply(format_with_prompt)

# 2. Split into training and validation sets (75:25 ratio)
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['inflation'])

# 3. Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# 4. Initialize tokenizer and model
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for better L4 GPU compatibility
    device_map="auto"
)

# Ensure the model uses the correct pad token
model.config.pad_token_id = tokenizer.pad_token_id

# 5. Tokenization function - now using formatted_body with prompt
def tokenize_function(examples):
    tokenized = tokenizer(
        examples['formatted_body'],  # Use formatted_body instead of body
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized['labels'] = examples['inflation']
    return tokenized

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)

# 6. Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# 7. Set up training arguments
training_args = TrainingArguments(
    # Output and logging
    output_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-520",
    logging_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-520/logs",

    # Evaluation and saving
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,

    # Training parameters
    learning_rate=5e-5,  # Slightly higher for Llama
    num_train_epochs=4,  # Start with fewer epochs
    per_device_train_batch_size=4,  # Smaller batch size for 3B model
    per_device_eval_batch_size=4,

    # Regularization
    weight_decay=0.01,
    warmup_ratio=0.1,

    # Model selection
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,  # Fixed: accuracy should be greater_is_better=True

    # Efficiency settings
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    bf16=True,  # Use BF16 instead of FP16 for L4 GPU compatibility
    dataloader_pin_memory=True,
    remove_unused_columns=True,

    # Reproducibility
    seed=42,

    # Reporting
    run_name="llama-3.2-3b-inflation-520",
    report_to="none"
)

In [None]:
# 8. Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[TimeTrackerCallback()]
)

# 9. Start training
print("Starting training...")
trainer.train()

# 260

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

class TimeTrackerCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, model=None, **kwargs):
        self.start_time = time.time()

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        elapsed_time = time.time() - self.start_time
        print(f"Epoch {state.epoch} training time: {elapsed_time:.2f} seconds")

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Define the IMF economist prompt
INFLATION_PROMPT = """You are a chief economist at the IMF. I would like you to infer the public perception of inflation from Reddit posts. Please classify each Reddit post into one of the following categories: 0: The post indicates deflation, such as the lower price of goods or services (e.g., "the prices are not bad"), affordable services (e.g., "this champagne is cheap and delicious"), sales information (e.g., "you can get it for only 10 dollars."), or a declining and buyer's market. 2: The post indicates or includes inflation, such as the higher price of goods or services (e.g., "it's not cheap"), the unreasonable cost of goods or services (e.g., "the food is overpriced and cold"), consumers struggling to afford necessities (e.g., "items are too expensive to buy"), shortage of goods of services, or mention about an asset bubble. 1: The post indicates neither deflation (0) nor inflation (2). This category also includes just questions to a community, social statements not personal experience, factual observations, references to originally expensive or cheap goods or services (e.g., "a gorgeous and costly dinner" or "an affordable Civic"), website promotion, authors' wishes, or illogical text. Please choose a stronger stance when the text includes both 0 and 2 stances. If these stances are of the same degree, answer 1.

Reddit Post: {post}

Classification:"""

In [None]:
# 1. Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/world-inflation/data/reddit/production/main-prod-260.csv', sep=',')
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['inflation'].value_counts()}")

# Add formatted prompt to each post
def format_with_prompt(post):
    return INFLATION_PROMPT.format(post=post)

df['formatted_body'] = df['body'].apply(format_with_prompt)

# 2. Split into training and validation sets (75:25 ratio)
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['inflation'])

# 3. Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# 4. Initialize tokenizer and model
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for better L4 GPU compatibility
    device_map="auto"
)

# Ensure the model uses the correct pad token
model.config.pad_token_id = tokenizer.pad_token_id

# 5. Tokenization function - now using formatted_body with prompt
def tokenize_function(examples):
    tokenized = tokenizer(
        examples['formatted_body'],  # Use formatted_body instead of body
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized['labels'] = examples['inflation']
    return tokenized

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)

# 6. Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# 7. Set up training arguments
training_args = TrainingArguments(
    # Output and logging
    output_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-260",
    logging_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-260/logs",

    # Evaluation and saving
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,

    # Training parameters
    learning_rate=5e-5,  # Slightly higher for Llama
    num_train_epochs=4,  # Start with fewer epochs
    per_device_train_batch_size=4,  # Smaller batch size for 3B model
    per_device_eval_batch_size=4,

    # Regularization
    weight_decay=0.01,
    warmup_ratio=0.1,

    # Model selection
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,  # Fixed: accuracy should be greater_is_better=True

    # Efficiency settings
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    bf16=True,  # Use BF16 instead of FP16 for L4 GPU compatibility
    dataloader_pin_memory=True,
    remove_unused_columns=True,

    # Reproducibility
    seed=42,

    # Reporting
    run_name="llama-3.2-3b-inflation-520",
    report_to="none"
)

In [None]:
# 8. Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[TimeTrackerCallback()]
)

# 9. Start training
print("Starting training...")
trainer.train()

# 130

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

class TimeTrackerCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, model=None, **kwargs):
        self.start_time = time.time()

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        elapsed_time = time.time() - self.start_time
        print(f"Epoch {state.epoch} training time: {elapsed_time:.2f} seconds")

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Define the IMF economist prompt
INFLATION_PROMPT = """You are a chief economist at the IMF. I would like you to infer the public perception of inflation from Reddit posts. Please classify each Reddit post into one of the following categories: 0: The post indicates deflation, such as the lower price of goods or services (e.g., "the prices are not bad"), affordable services (e.g., "this champagne is cheap and delicious"), sales information (e.g., "you can get it for only 10 dollars."), or a declining and buyer's market. 2: The post indicates or includes inflation, such as the higher price of goods or services (e.g., "it's not cheap"), the unreasonable cost of goods or services (e.g., "the food is overpriced and cold"), consumers struggling to afford necessities (e.g., "items are too expensive to buy"), shortage of goods of services, or mention about an asset bubble. 1: The post indicates neither deflation (0) nor inflation (2). This category also includes just questions to a community, social statements not personal experience, factual observations, references to originally expensive or cheap goods or services (e.g., "a gorgeous and costly dinner" or "an affordable Civic"), website promotion, authors' wishes, or illogical text. Please choose a stronger stance when the text includes both 0 and 2 stances. If these stances are of the same degree, answer 1.

Reddit Post: {post}

Classification:"""

In [None]:
# 1. Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/world-inflation/data/reddit/production/main-prod-130.csv', sep=',')
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['inflation'].value_counts()}")

# Add formatted prompt to each post
def format_with_prompt(post):
    return INFLATION_PROMPT.format(post=post)

df['formatted_body'] = df['body'].apply(format_with_prompt)

# 2. Split into training and validation sets (75:25 ratio)
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['inflation'])

# 3. Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# 4. Initialize tokenizer and model
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for better L4 GPU compatibility
    device_map="auto"
)

# Ensure the model uses the correct pad token
model.config.pad_token_id = tokenizer.pad_token_id

# 5. Tokenization function - now using formatted_body with prompt
def tokenize_function(examples):
    tokenized = tokenizer(
        examples['formatted_body'],  # Use formatted_body instead of body
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized['labels'] = examples['inflation']
    return tokenized

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)

# 6. Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# 7. Set up training arguments
training_args = TrainingArguments(
    # Output and logging
    output_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-130",
    logging_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-130/logs",

    # Evaluation and saving
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,

    # Training parameters
    learning_rate=5e-5,  # Slightly higher for Llama
    num_train_epochs=4,  # Start with fewer epochs
    per_device_train_batch_size=4,  # Smaller batch size for 3B model
    per_device_eval_batch_size=4,

    # Regularization
    weight_decay=0.01,
    warmup_ratio=0.1,

    # Model selection
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,  # Fixed: accuracy should be greater_is_better=True

    # Efficiency settings
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    bf16=True,  # Use BF16 instead of FP16 for L4 GPU compatibility
    dataloader_pin_memory=True,
    remove_unused_columns=True,

    # Reproducibility
    seed=42,

    # Reporting
    run_name="llama-3.2-3b-inflation-520",
    report_to="none"
)

In [None]:
# 8. Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[TimeTrackerCallback()]
)

# 9. Start training
print("Starting training...")
trainer.train()

# 65

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

class TimeTrackerCallback(TrainerCallback):
    def on_epoch_begin(self, args, state, control, model=None, **kwargs):
        self.start_time = time.time()

    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        elapsed_time = time.time() - self.start_time
        print(f"Epoch {state.epoch} training time: {elapsed_time:.2f} seconds")

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Define the IMF economist prompt
INFLATION_PROMPT = """You are a chief economist at the IMF. I would like you to infer the public perception of inflation from Reddit posts. Please classify each Reddit post into one of the following categories: 0: The post indicates deflation, such as the lower price of goods or services (e.g., "the prices are not bad"), affordable services (e.g., "this champagne is cheap and delicious"), sales information (e.g., "you can get it for only 10 dollars."), or a declining and buyer's market. 2: The post indicates or includes inflation, such as the higher price of goods or services (e.g., "it's not cheap"), the unreasonable cost of goods or services (e.g., "the food is overpriced and cold"), consumers struggling to afford necessities (e.g., "items are too expensive to buy"), shortage of goods of services, or mention about an asset bubble. 1: The post indicates neither deflation (0) nor inflation (2). This category also includes just questions to a community, social statements not personal experience, factual observations, references to originally expensive or cheap goods or services (e.g., "a gorgeous and costly dinner" or "an affordable Civic"), website promotion, authors' wishes, or illogical text. Please choose a stronger stance when the text includes both 0 and 2 stances. If these stances are of the same degree, answer 1.

Reddit Post: {post}

Classification:"""

In [None]:
# 1. Load the CSV file
df = pd.read_csv('/content/drive/MyDrive/world-inflation/data/reddit/production/main-prod-65.csv', sep=',')
print(f"Dataset shape: {df.shape}")
print(f"Class distribution:\n{df['inflation'].value_counts()}")

# Add formatted prompt to each post
def format_with_prompt(post):
    return INFLATION_PROMPT.format(post=post)

df['formatted_body'] = df['body'].apply(format_with_prompt)

# 2. Split into training and validation sets (75:25 ratio)
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42, stratify=df['inflation'])

# 3. Convert to HuggingFace Datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# 4. Initialize tokenizer and model
model_name = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for better L4 GPU compatibility
    device_map="auto"
)

# Ensure the model uses the correct pad token
model.config.pad_token_id = tokenizer.pad_token_id

# 5. Tokenization function - now using formatted_body with prompt
def tokenize_function(examples):
    tokenized = tokenizer(
        examples['formatted_body'],  # Use formatted_body instead of body
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )
    tokenized['labels'] = examples['inflation']
    return tokenized

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)

# 6. Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    preds = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
# 7. Set up training arguments
training_args = TrainingArguments(
    # Output and logging
    output_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-65",
    logging_dir="/content/drive/MyDrive/world-inflation/data/model/llama-3.2-3b-fine-tuning-65/logs",

    # Evaluation and saving
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,

    # Training parameters
    learning_rate=5e-5,  # Slightly higher for Llama
    num_train_epochs=4,  # Start with fewer epochs
    per_device_train_batch_size=4,  # Smaller batch size for 3B model
    per_device_eval_batch_size=4,

    # Regularization
    weight_decay=0.01,
    warmup_ratio=0.1,

    # Model selection
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,  # Fixed: accuracy should be greater_is_better=True

    # Efficiency settings
    gradient_accumulation_steps=4,  # Effective batch size = 4 * 4 = 16
    bf16=True,  # Use BF16 instead of FP16 for L4 GPU compatibility
    dataloader_pin_memory=True,
    remove_unused_columns=True,

    # Reproducibility
    seed=42,

    # Reporting
    run_name="llama-3.2-3b-inflation-65",
    report_to="none"
)

In [None]:
# 8. Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[TimeTrackerCallback()]
)

# 9. Start training
print("Starting training...")
trainer.train()