In [1]:
!pip install unsloth==2025.3.18 unsloth-zoo==2025.3.16 datasets

Collecting unsloth==2025.3.18
  Downloading unsloth-2025.3.18-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth-zoo==2025.3.16
  Downloading unsloth_zoo-2025.3.16-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth==2025.3.18)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth==2025.3.18)
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting tyro (from unsloth==2025.3.18)
  Downloading tyro-0.9.22-py3-none-any.whl.metadata (10 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth==2025.

In [2]:
!pip install transformers wandb



In [None]:
!pip install unsloth



In [None]:
# IELTS Writing Evaluator Fine-tuning with Llama 3.2-3B
# Optimized for Google Colab and Ollama deployment
import torch
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling
from unsloth import FastLanguageModel, is_bfloat16_supported
import wandb
from huggingface_hub import login


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel, is_bfloat16_supported


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
# --- Configuration ---
CSV_PATH = "ielts_writing_dataset.csv"
MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
OUTPUT_DIR = "./ielts_writing_evaluator"
HF_REPO_NAME = "chloemeow/ielts-writing-evaluator"
GITHUB_REPO = "ChLoeeei/ielts-writing-evaluator"

# Model parameters - optimized for Colab
MAX_SEQ_LENGTH = 2048
LOAD_IN_4BIT = True
DTYPE = None  # Let unsloth decide automatically

# Credentials
HF_TOKEN = "hf_xx"
wandb_key = "673xx"

print("🚀 Starting IELTS Writing Evaluator Fine-tuning")
print(f"📊 Loading dataset from: {CSV_PATH}")

🚀 Starting IELTS Writing Evaluator Fine-tuning
📊 Loading dataset from: ielts_writing_dataset.csv


In [None]:
# --- Dataset Loading and Preparation ---
try:
    dataset_df = pd.read_csv(CSV_PATH)
    # Clean the dataset
    dataset_df = dataset_df[['Question', 'Essay', 'Overall']].dropna().reset_index(drop=True)
    print(f"✅ Loaded {len(dataset_df)} essays with scores.")
    print(f"📈 Score distribution:")
    print(dataset_df['Overall'].value_counts().sort_index())
except Exception as e:
    print(f"❌ Error reading CSV: {e}")
    exit()

# --- Enhanced Dataset Formatting ---
def format_instruction(sample):
    """
    Enhanced format for both evaluation and teaching capabilities
    """
    question = sample['Question']
    essay = sample['Essay']
    overall_score = sample['Overall']

    # Create more detailed evaluation template
    instruction = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert IELTS writing examiner and teacher. Your task is to evaluate essays and provide educational feedback to help students improve their writing skills.

Evaluation Criteria:
1. Task Achievement (TA): How well the essay addresses the task
2. Coherence and Cohesion (CC): Organization and logical flow
3. Lexical Resource (LR): Vocabulary range and accuracy
4. Grammatical Range and Accuracy (GRA): Grammar complexity and correctness

Each criterion is scored from 1-9, and the overall band score is calculated accordingly.<|eot_id|><|start_header_id|>user<|end_header_id|>

Please evaluate this IELTS Writing Task 2 essay:

**Question:** {question}

**Student Essay:** {essay}

Please provide:
1. Overall band score and individual criterion scores
2. Detailed analysis for each criterion
3. Specific examples from the essay
4. Improvement suggestions
5. Sample corrections for key errors<|eot_id|><|start_header_id|>assistant<|end_header_id|>

## IELTS Writing Evaluation

### Overall Band Score: {overall_score}/9.0

### Detailed Analysis:

**Task Achievement:** The essay {"addresses the task effectively" if float(overall_score) >= 7 else "partially addresses the task requirements" if float(overall_score) >= 5 else "has limited task response"}.

**Coherence and Cohesion:** The essay {"demonstrates clear organization with effective paragraphing" if float(overall_score) >= 7 else "shows some organization but may lack clear progression" if float(overall_score) >= 5 else "has limited coherence and cohesion"}.

**Lexical Resource:** The vocabulary {"shows good range and flexibility" if float(overall_score) >= 7 else "is adequate but may be repetitive" if float(overall_score) >= 5 else "is limited with frequent errors"}.

**Grammatical Range and Accuracy:** The grammar {"demonstrates variety with good control" if float(overall_score) >= 7 else "shows some variety but with errors" if float(overall_score) >= 5 else "is limited with frequent errors"}.

### Key Improvement Areas:
- {"Focus on developing more complex arguments" if float(overall_score) < 7 else "Continue to refine argument sophistication"}
- {"Work on paragraph transitions and logical flow" if float(overall_score) < 6 else "Maintain good organizational structure"}
- {"Expand vocabulary range and accuracy" if float(overall_score) < 6 else "Continue building advanced vocabulary"}
- {"Practice complex sentence structures" if float(overall_score) < 6 else "Maintain grammatical accuracy"}

### Study Recommendations:
1. **Reading:** Study high-scoring IELTS essays in similar topics
2. **Practice:** Write essays with timed conditions (40 minutes)
3. **Vocabulary:** Build topic-specific vocabulary banks
4. **Grammar:** Focus on complex sentence structures and accuracy<|eot_id|>"""

    return {"text": instruction}

# Create datasets
print("📝 Formatting dataset...")
formatted_data = [format_instruction(row) for _, row in dataset_df.iterrows()]
dataset = Dataset.from_list(formatted_data)

# Split dataset - handle classes with only 1 sample
print("📊 Handling dataset split...")

# Check class distribution
score_counts = dataset_df['Overall'].value_counts()
print(f"Score distribution: {dict(score_counts)}")

# Find classes with only 1 sample
single_sample_classes = score_counts[score_counts == 1].index.tolist()
if single_sample_classes:
    print(f"⚠️  Classes with only 1 sample: {single_sample_classes}")
    print("Using random split instead of stratified split to handle rare classes.")

    # Use random split for better handling of rare classes
    train_dataset, eval_dataset = train_test_split(
        formatted_data,
        test_size=0.15,  # Slightly larger eval set to ensure diversity
        random_state=42,
        shuffle=True
    )
else:
    # Use stratified split if all classes have multiple samples
    train_dataset, eval_dataset = train_test_split(
        formatted_data,
        test_size=0.1,
        random_state=42,
        stratify=dataset_df['Overall']
    )

train_dataset = Dataset.from_list(train_dataset)
eval_dataset = Dataset.from_list(eval_dataset)

print(f"📚 Training set: {len(train_dataset)} samples")
print(f"📊 Evaluation set: {len(eval_dataset)} samples")

# --- Authentication ---
print("Authenticating...")
login(token=HF_TOKEN)
wandb.login(key=wandb_key)

wandb.init(
    project="ielts_writing_evaluator",
    name="llama3.2-3b-ielts-v1",
    config={
        "model": MODEL_NAME,
        "max_seq_length": MAX_SEQ_LENGTH,
        "dataset_size": len(dataset_df)
    }
)


✅ Loaded 1435 essays with scores.
📈 Score distribution:
Overall
1.0      1
3.0      2
3.5      5
4.0     11
4.5     21
5.0    104
5.5    176
6.0    264
6.5    250
7.0    254
7.5    138
8.0    137
8.5     35
9.0     37
Name: count, dtype: int64
📝 Formatting dataset...
📊 Handling dataset split...
Score distribution: {6.0: np.int64(264), 7.0: np.int64(254), 6.5: np.int64(250), 5.5: np.int64(176), 7.5: np.int64(138), 8.0: np.int64(137), 5.0: np.int64(104), 9.0: np.int64(37), 8.5: np.int64(35), 4.5: np.int64(21), 4.0: np.int64(11), 3.5: np.int64(5), 3.0: np.int64(2), 1.0: np.int64(1)}
⚠️  Classes with only 1 sample: [1.0]
Using random split instead of stratified split to handle rare classes.
📚 Training set: 1219 samples
📊 Evaluation set: 216 samples
🔑 Authenticating...


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mchloeeellff00[0m ([33mchloeeellff00-south-china-normal-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
!pip uninstall unsloth
!pip install --upgrade unsloth

[0mCollecting unsloth
  Downloading unsloth-2025.5.7-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.5.8 (from unsloth)
  Downloading unsloth_zoo-2025.5.8-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.22-py3-none-any.whl.metadata (10 kB)
Collecting datasets>=3.4.1 (from unsloth)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth)
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting protobuf<4.0.0 (from unsloth)
  Downloading protobuf-3

In [None]:
# --- Model Loading ---
print(f"🤖 Loading model: {MODEL_NAME}")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=DTYPE,
    load_in_4bit=LOAD_IN_4BIT,
    trust_remote_code=True,
)

# Configure LoRA
print("⚡ Setting up LoRA configuration...")
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  # Increased rank for better performance
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

# --- Training Configuration ---
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=1,  # Reduced for Colab
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,  # Increased to maintain effective batch size
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    logging_steps=10,
    learning_rate=2e-4,
    warmup_steps=50,
    optim="adamw_8bit",
    weight_decay=0.01,
    remove_unused_columns=False,
    report_to="wandb",
    dataloader_pin_memory=False,  # Reduce memory usage
    fp16=not is_bfloat16_supported(),
    bf16=is_bfloat16_supported(),
    group_by_length=True,  # Efficient batching
    ddp_find_unused_parameters=False,
)

# --- Tokenization ---
def tokenize_function(examples):
    """Tokenize the formatted instructions"""
    tokenized = tokenizer(
        examples["text"],
        padding=False,  # Dynamic padding is more memory efficient
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        return_tensors=None,
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

print("🔤 Tokenizing datasets...")
tokenized_train_dataset = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"],
    desc="Tokenizing train dataset"
)

tokenized_eval_dataset = eval_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"],
    desc="Tokenizing eval dataset"
)

# Data collator for dynamic padding
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8,  # Optimize for tensor cores
)

# --- Training ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    data_collator=data_collator,
)

print("Starting training...")
trainer.train()


🤖 Loading model: unsloth/Llama-3.2-3B-Instruct-bnb-4bit
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


⚡ Setting up LoRA configuration...


Unsloth 2025.5.7 patched 28 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


🔤 Tokenizing datasets...


Tokenizing train dataset:   0%|          | 0/1219 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/216 [00:00<?, ? examples/s]

🎯 Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,219 | Num Epochs = 3 | Total steps = 456
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 48,627,712/3,000,000,000 (1.62% trained)


RuntimeError: PassManager::run failed

In [None]:
# --- Model Saving ---
print("💾 Saving model...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("📤 Saving to Hugging Face Hub...")
model.push_to_hub(HF_REPO_NAME, token=HF_TOKEN)
tokenizer.push_to_hub(HF_REPO_NAME, token=HF_TOKEN)

💾 Saving model...
📤 Saving to Hugging Face Hub...


README.md:   0%|          | 0.00/598 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/195M [00:00<?, ?B/s]

Saved model to https://huggingface.co/chloemeow/ielts-writing-evaluator


tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [None]:

# --- Prepare for Ollama ---
print("🔄 Converting for Ollama deployment...")

# Save in format suitable for Ollama
ollama_dir = "./ollama_model"
model.save_pretrained_merged(ollama_dir, tokenizer, save_method="merged_16bit")

print(f"""
✅ Training Complete!

📁 Model saved to: {OUTPUT_DIR}
🤗 Hugging Face: {HF_REPO_NAME}
🦙 Ollama model: {ollama_dir}

🚀 To deploy with Ollama:
1. Copy the model files to your Ollama models directory
2. Create a Modelfile with your model configuration
3. Run: ollama create ielts-evaluator -f Modelfile

📝 Example Modelfile:
FROM {ollama_dir}
TEMPLATE \"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert IELTS writing examiner and teacher.
<|eot_id|><|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
\"\"\"
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER stop <|eot_id|>
""")

# Clean up
wandb.finish()
print("🎉 All done! Your IELTS Writing Evaluator is ready!")

🔄 Converting for Ollama deployment...


Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 2.2G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 55.25 out of 83.48 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 28/28 [00:00<00:00, 103.39it/s]


Unsloth: Saving tokenizer... Done.
Done.

✅ Training Complete!

📁 Model saved to: ./ielts_writing_evaluator
🤗 Hugging Face: chloemeow/ielts-writing-evaluator
🦙 Ollama model: ./ollama_model

🚀 To deploy with Ollama:
1. Copy the model files to your Ollama models directory
2. Create a Modelfile with your model configuration
3. Run: ollama create ielts-evaluator -f Modelfile

📝 Example Modelfile:
FROM ./ollama_model
TEMPLATE """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert IELTS writing examiner and teacher.
<|eot_id|><|start_header_id|>user<|end_header_id|>
{ .Prompt }<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER stop <|eot_id|>



0,1
eval/loss,█▅▄▃▂▂▂▁▁
eval/runtime,▁▁▁▆▅▃▄▅█
eval/samples_per_second,███▃▄▆▅▄▁
eval/steps_per_second,███▃▄▆▅▄▁
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,█▆▃▃▂▁▂▂▂▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▄▃▄▄▃▄▄▄▄▃▃
train/learning_rate,▂▄▅▇████▇▇▇▇▇▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁
train/loss,█▇▃▃▄▃▂▃▃▂▃▃▃▂▃▃▂▃▃▂▂▃▃▂▃▃▂▂▃▂▁▂▂▁▃▃▂▁▃▁

0,1
eval/loss,1.07829
eval/runtime,29.1068
eval/samples_per_second,7.421
eval/steps_per_second,7.421
total_flos,4.464888412141978e+16
train/epoch,2.98441
train/global_step,456.0
train/grad_norm,0.32534
train/learning_rate,0.0
train/loss,0.771


🎉 All done! Your IELTS Writing Evaluator is ready!


In [None]:
model.config.to_json_file("config.json")


In [None]:
!zip -r ollama_model.zip ollama_model

  adding: ollama_model/ (stored 0%)
  adding: ollama_model/generation_config.json (deflated 37%)
  adding: ollama_model/model-00001-of-00002.safetensors (deflated 21%)
  adding: ollama_model/model-00002-of-00002.safetensors (deflated 21%)
  adding: ollama_model/tokenizer_config.json (deflated 94%)
  adding: ollama_model/config.json (deflated 53%)
  adding: ollama_model/tokenizer.json (deflated 85%)
  adding: ollama_model/model.safetensors.index.json (deflated 96%)
  adding: ollama_model/special_tokens_map.json (deflated 71%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


!cp /content/ollama_model.zip /content/drive/MyDrive/


In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("chloemeow/ielts-writing-evaluator")
model = AutoModelForCausalLM.from_pretrained("chloemeow/ielts-writing-evaluator")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/924 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/871 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/195M [00:00<?, ?B/s]

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 3. 定义要评分的雅思作文（英文原文）
essay_text = """
Opinions diverge among young adults pursuing tertiary education currently. While some of them opt for a multidisciplinary route, others concentrate on one single major. There are valid reasons for both sides, which will be further explained in this essay before presenting my own opinion.

For proponents of multi-course strategy, the reasons involve the recognition that the job market in the existing society requires well-rounded talents rather than professional experts. This holds some truth for general majors such as English/Chinese Studies, Marketing and Business. To have a competitive edge over other graduates and would-be editors, sellers, and managers, being equipped with additional knowledge of related fields can be a viable approach. Coveting having a minor apart from the courses in their own major can be even more desirable for numerous students unsure of their future jobs. Probably by having a taste of distinct courses, they can find a major that ignites their inner pursuit and fully utilise their aptitudes for a proper future career.

However, there are certain cases where allocating time to various courses in different majors is not a rule of thumb. This particularly applies to those majors requiring arduous effort and resilience. One prime example is would-be lawyers and doctors. To successfully earn qualification, these graduates should be devoted to their key courses, learning an excessive number of past court cases and symptoms by heart before they can garner a decent and satisfactory job. Were it not for their years of endeavour in their studies, they would not be qualified workers in relevant fields.

In conclusion, whether choosing to delve into one major or striving for several various subjects is contingent on the nature of majors. While those majors entailing a significant amount of professional expertise give no spare time for undergraduates, college students with no clear vision of their future jobs and more ambitious ones can assume more control over their choice of majors.
"""

# 4. 构建改进后的 prompt，明确要求中文回答并给出 1–9 分数
prompt = f"""请用中文对下面的 IELTS Task 2 作文进行评价，并给出从 1 到 9 的“整体分数”，最后用几句话说明评分理由。

【作文原文】
{essay_text}

【评价】
"""

# 5. 对 prompt 进行编码，并将张量移动到 GPU
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
inputs = {k: v.to(device) for k, v in inputs.items()}

# 6. 用模型生成答案
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,       # 最多生成 200 个新 token
        do_sample=False,          # 关闭采样以获得更稳定的输出
        num_beams=3               # 使用 beam search 来提高答案质量
    )

# 7. 解码输出结果并打印
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


请用中文对下面的 IELTS Task 2 作文进行评价，并给出从 1 到 9 的“整体分数”，最后用几句话说明评分理由。

【作文原文】

Opinions diverge among young adults pursuing tertiary education currently. While some of them opt for a multidisciplinary route, others concentrate on one single major. There are valid reasons for both sides, which will be further explained in this essay before presenting my own opinion.

For proponents of multi-course strategy, the reasons involve the recognition that the job market in the existing society requires well-rounded talents rather than professional experts. This holds some truth for general majors such as English/Chinese Studies, Marketing and Business. To have a competitive edge over other graduates and would-be editors, sellers, and managers, being equipped with additional knowledge of related fields can be a viable approach. Coveting having a minor apart from the courses in their own major can be even more desirable for numerous students unsure of their future jobs. Probably by having a taste of dist