In [2]:
# Install Required Packages & Import Dependencies
%%capture
!pip install transformers torch pandas numpy tqdm scikit-learn nltk seaborn matplotlib
import json
import pandas as pd
import numpy as np
import re
from pathlib import Path
from typing import List, Dict, Tuple
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
import random
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import nltk
from nltk.translate.bleu_score import sentence_bleu
from collections import defaultdict
from datetime import datetime
import os
from google.colab import drive

In [3]:
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Load dataset from JSON
def load_json_dataset(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    problems = [entry['Body'] for entry in data]  # Use 'Body' for the problem description
    equations = [entry['Equation'] for entry in data]  # Use 'Equation' for the expected equation

    return problems, equations

# Specify the path to your JSON file on Google Drive
DATASET_PATH = '/content/drive/MyDrive/AWPSS/SVAMP.json'

# Load the dataset
problems, equations = load_json_dataset(DATASET_PATH)

In [5]:
# Dataset Class
class MathDataset(Dataset):
    def __init__(self, problems, equations, tokenizer, max_length=256):
        self.tokenizer = tokenizer
        self.problems = problems
        self.equations = equations
        self.max_length = max_length

    def __len__(self):
        return len(self.problems)

    def __getitem__(self, idx):
        problem = str(self.problems[idx])
        equation = str(self.equations[idx])

        input_text = f"Generate equation: {problem}"
        target_text = equation

        input_encoding = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )

        target_encoding = self.tokenizer(
            target_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )

        return {
            'input_ids': input_encoding['input_ids'].squeeze(),
            'attention_mask': input_encoding['attention_mask'].squeeze(),
            'labels': target_encoding['input_ids'].squeeze(),
        }

In [6]:
class MathProblemSolver:
    def __init__(self, model_name="t5-base"):  # Model being use
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)  # Using T5 tokenizer
        self.model = T5ForConditionalGeneration.from_pretrained(model_name)  # Using T5 model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        print(f"Using device: {self.device}")

    def augment_data(self, problems, equations):
        augmented_problems = []
        augmented_equations = []

        # Extended number word dictionary
        number_words = {
            'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5',
            'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'ten': '10',
            'zero': '0', 'eleven': '11', 'twelve': '12', 'thirteen': '13', 'fourteen': '14',
            'fifteen': '15', 'sixteen': '16', 'seventeen': '17', 'eighteen': '18', 'nineteen': '19',
            'twenty': '20', 'thirty': '30', 'forty': '40', 'fifty': '50', 'sixty': '60', 'seventy': '70',
            'eighty': '80', 'ninety': '90', 'hundred': '100'
        }

        for problem, equation in zip(problems, equations):
            # Number substitution
            numbers = re.findall(r'\d+', problem)
            if numbers:
                for _ in range(2):  # Generate 2 variations per original problem
                    new_problem = problem
                    new_equation = equation
                    number_mapping = {}

                    for num in numbers:
                        orig_num = int(num)
                        # Controlled number substitution (between 1x and 3x)
                        new_num = str(random.randint(
                            max(1, int(orig_num * 1.5)),
                            int(orig_num * 3)
                        ))
                        number_mapping[num] = new_num

                    # Replace numbers with new ones in problem and equation
                    for old_num, new_num in number_mapping.items():
                        new_problem = new_problem.replace(old_num, new_num)
                        new_equation = new_equation.replace(old_num, new_num)

                    # Check if the new problem still makes sense mathematically
                    try:
                        eval(new_equation)  # Evaluate equation to check validity
                        augmented_problems.append(new_problem)
                        augmented_equations.append(new_equation)
                    except:
                        continue  # Skip if the new equation is invalid

            # Number word variation (e.g., "two" -> "2" and vice versa)
            for word, digit in number_words.items():
                if word in problem.lower():
                    new_problem = problem.lower().replace(word, digit)
                    augmented_problems.append(new_problem)
                    augmented_equations.append(equation)
                elif digit in problem:
                    new_problem = problem.replace(digit, word)
                    augmented_problems.append(new_problem)
                    augmented_equations.append(equation)

        return (problems + augmented_problems, equations + augmented_equations)

    def prepare_data(self, problems, equations, test_size=0.2):
        problems, equations = self.augment_data(problems, equations)

        train_problems, val_problems, train_equations, val_equations = train_test_split(
            problems, equations, test_size=test_size, random_state=42
        )

        train_dataset = MathDataset(train_problems, train_equations, self.tokenizer)
        val_dataset = MathDataset(val_problems, val_equations, self.tokenizer)

        return train_dataset, val_dataset

    def train(self, train_dataset, val_dataset, epochs=15, batch_size=8):
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        optimizer = torch.optim.AdamW(self.model.parameters(), lr=3e-5, weight_decay=0.02)

        num_training_steps = len(train_loader) * epochs
        num_warmup_steps = num_training_steps // 10

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps
        )

        best_val_loss = float('inf')
        for epoch in range(epochs):
            self.model.train()
            total_train_loss = 0
            train_steps = 0

            for batch in tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}'):
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                labels = batch['labels'].to(self.device)

                optimizer.zero_grad()

                outputs = self.model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    labels=labels
                )

                loss = outputs.loss
                total_train_loss += loss.item()
                train_steps += 1

                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                optimizer.step()
                scheduler.step()

            avg_train_loss = total_train_loss / train_steps

            self.model.eval()
            total_val_loss = 0
            val_steps = 0

            with torch.no_grad():
                for batch in tqdm(val_loader, desc='Validating'):
                    input_ids = batch['input_ids'].to(self.device)
                    attention_mask = batch['attention_mask'].to(self.device)
                    labels = batch['labels'].to(self.device)

                    outputs = self.model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        labels=labels
                    )

                    total_val_loss += outputs.loss.item()
                    val_steps += 1

            avg_val_loss = total_val_loss / val_steps

            print(f'Epoch {epoch + 1}:')
            print(f'Average training loss: {avg_train_loss:.4f}')
            print(f'Average validation loss: {avg_val_loss:.4f}')
            print(f'Learning rate: {scheduler.get_last_lr()[0]:.7f}')

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_val_loss,
                }, '/content/drive/MyDrive/AWPSS/AWPSS_BEST_MODEL.pt')

    def solve(self, problem):
        self.model.eval()
        input_text = f"Generate equation: {problem}"

        inputs = self.tokenizer(
            input_text,
            max_length=256,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        ).to(self.device)

        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=256,
                num_beams=5,
                length_penalty=1.0,
                early_stopping=True,
                no_repeat_ngram_size=2,
                do_sample=True,
                top_k=50,
                top_p=0.95,
                temperature=0.7
            )

        equation = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return equation

    def solve_and_compute(self, problem):
        equation = self.solve(problem)
        equation_to_solve = equation.split('=')[0].strip()

        try:
            result = eval(equation_to_solve)
            return equation, result
        except:
            return equation, "Unable to compute result"

In [7]:
# Main execution
def main():
    # Load dataset from JSON file
    problems, equations = load_json_dataset(DATASET_PATH)

    # Initialize solver
    solver = MathProblemSolver()

    # Prepare datasets
    train_dataset, val_dataset = solver.prepare_data(problems, equations)

    # Train model
    solver.train(train_dataset, val_dataset, epochs=15, batch_size=8)

    # Test the trained model
    test_problems = [
        "Each pack of dvds costs 76 dollars. If there is a discount of 22 dollars on each pack, how much do you have to pay to buy each pack?",
        "Dan had $ 3 left with him after he bought a candy bar. If he had $ 10 at the start, how much did the candy bar cost?",
        "Paco had 20 salty cookies and 17 sweet cookies. He ate 14 sweet cookies and 9 salty cookies. How many salty cookies did Paco have left?",
        "43 children were riding on the bus. At the bus stop some children got off the bus. Then there were 21 children left on the bus. How many children got off the bus at the bus stop?"
    ]

    print("\nTesting the model:")
    for problem in test_problems:
        equation, result = solver.solve_and_compute(problem)
        print(f"\nProblem: {problem}")
        print(f"Generated equation: {equation}")
        print(f"Result: {result}")

if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Using device: cuda


Epoch 1/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 1:
Average training loss: 3.5161
Average validation loss: 0.0375
Learning rate: 0.0000200


Epoch 2/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 2:
Average training loss: 0.0373
Average validation loss: 0.0212
Learning rate: 0.0000289


Epoch 3/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 3:
Average training loss: 0.0245
Average validation loss: 0.0153
Learning rate: 0.0000267


Epoch 4/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 4:
Average training loss: 0.0190
Average validation loss: 0.0120
Learning rate: 0.0000244


Epoch 5/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 5:
Average training loss: 0.0154
Average validation loss: 0.0098
Learning rate: 0.0000222


Epoch 6/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 6:
Average training loss: 0.0130
Average validation loss: 0.0085
Learning rate: 0.0000200


Epoch 7/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 7:
Average training loss: 0.0117
Average validation loss: 0.0072
Learning rate: 0.0000178


Epoch 8/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 8:
Average training loss: 0.0098
Average validation loss: 0.0066
Learning rate: 0.0000156


Epoch 9/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 9:
Average training loss: 0.0088
Average validation loss: 0.0060
Learning rate: 0.0000133


Epoch 10/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 10:
Average training loss: 0.0079
Average validation loss: 0.0055
Learning rate: 0.0000111


Epoch 11/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 11:
Average training loss: 0.0073
Average validation loss: 0.0053
Learning rate: 0.0000089


Epoch 12/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 12:
Average training loss: 0.0066
Average validation loss: 0.0050
Learning rate: 0.0000067


Epoch 13/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 13:
Average training loss: 0.0064
Average validation loss: 0.0047
Learning rate: 0.0000044


Epoch 14/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 14:
Average training loss: 0.0060
Average validation loss: 0.0046
Learning rate: 0.0000022


Epoch 15/15:   0%|          | 0/803 [00:00<?, ?it/s]

Validating:   0%|          | 0/201 [00:00<?, ?it/s]

Epoch 15:
Average training loss: 0.0060
Average validation loss: 0.0046
Learning rate: 0.0000000

Testing the model:

Problem: Each pack of dvds costs 76 dollars. If there is a discount of 22 dollars on each pack, how much do you have to pay to buy each pack?
Generated equation: ( 76.0 - 22.0 )
Result: 54.0

Problem: Dan had $ 3 left with him after he bought a candy bar. If he had $ 10 at the start, how much did the candy bar cost?
Generated equation: ( 10.0 - 3.0 )
Result: 7.0

Problem: Paco had 20 salty cookies and 17 sweet cookies. He ate 14 sweet cookies and 9 salty cookies. How many salty cookies did Paco have left?
Generated equation: ( 20.0 - 9.0 )
Result: 11.0

Problem: 43 children were riding on the bus. At the bus stop some children got off the bus. Then there were 21 children left on the bus. How many children got off the bus at the bus stop?
Generated equation: ( 43.0 - 21.0 )
Result: 22.0


In [8]:
def solve_user_input():
    # Load the trained model
    solver = MathProblemSolver()

    # Load the best saved model weights, mapping to CPU
    checkpoint = torch.load('/content/drive/MyDrive/AWPSS/AWPSS_BEST_MODEL.pt', map_location=torch.device('cpu'))
    solver.model.load_state_dict(checkpoint['model_state_dict'])

    while True:
        user_input = input("Enter a word problem (or type 'exit' to quit): ")

        if user_input.lower() == 'exit':
            break

        # Solve the user input problem
        equation, result = solver.solve_and_compute(user_input)

        print(f"Generated equation: {equation}")
        print(f"Result: {result}")

if __name__ == "__main__":
    solve_user_input()




Using device: cpu


  checkpoint = torch.load('/content/drive/MyDrive/AWPSS/AWPSS_BEST_MODEL.pt', map_location=torch.device('cpu'))


Enter a word problem (or type 'exit' to quit): Emily has 15 crayons in her box. Her friend gave her 8 more crayons. How many crayons does Emily have in total now?
Generated equation: ( 15.0 + 8.0 )
Result: 23.0
Enter a word problem (or type 'exit' to quit): A farmer planted 24 apple trees in his orchard. Later, he planted 16 more apple trees. How many apple trees does the farmer have in total?
Generated equation: ( 24.0 + 16.0 )
Result: 40.0
Enter a word problem (or type 'exit' to quit): James had 45 stickers. He gave 12 stickers to his younger brother. How many stickers does James have left?
Generated equation: ( 45.0 - 12.0 )
Result: 33.0
Enter a word problem (or type 'exit' to quit): There were 30 chairs in a hall. If 9 chairs were removed for cleaning. How many chairs are left in the hall?
Generated equation: ( 30.0 - 9.0 )
Result: 21.0
Enter a word problem (or type 'exit' to quit): Each box contains 6 pens. If there are 7 boxes in total. How many pens are there in all?
Generated e