In [55]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [56]:

print(torch.__version__)        # e.g., 2.4.0+cu121
print(torch.version.cuda)       # should say 12.1
print(torch.cuda.is_available())# should be True



2.8.0+cu129
12.9
True


In [57]:
data = [
    ("Bought pizza", "food"),
    ("Groceries shopping", "food"),
    ("Lunch at restaurant", "food"),
    ("Dinner with family", "food"),
    ("Bought vegetables", "food"),
    ("Bought fruits", "food"),
    ("Ordered online food", "food"),
    ("Coffee from cafe", "food"),
    ("Snacks from shop", "food"),
    ("Ice cream purchase", "food"),
    
    ("Doctor consultation", "medical"),
    ("Medicine purchase", "medical"),
    ("Bought vitamins", "medical"),
    ("Annual health checkup", "medical"),
    ("Dental appointment", "medical"),
    ("Eye checkup", "medical"),
    ("Vaccination fee", "medical"),
    ("Hospital visit", "medical"),
    ("Pharmacy bill", "medical"),
    ("Blood test", "medical"),
    
    ("Bus ticket", "transport"),
    ("Fuel for car", "transport"),
    ("Taxi ride", "transport"),
    ("Train ticket", "transport"),
    ("Flight booking", "transport"),
    ("Uber fare", "transport"),
    ("Metro pass", "transport"),
    ("Gas refill", "transport"),
    ("Parking fee", "transport"),
    ("Bike repair", "transport"),
    
    ("Movie ticket", "others"),
    ("Gym membership", "others"),
    ("Book purchase", "others"),
    ("Concert ticket", "others"),
    ("Music subscription", "others"),
    ("Gift for friend", "others"),
    ("Clothes shopping", "others"),
    ("Online course", "others"),
    ("Software purchase", "others"),
    ("Game purchase", "others"),
    
    ("Dinner at cafe", "food"),
    ("Bought salad", "food"),
    ("Car service", "transport"),
    ("Bought cough syrup", "medical"),
    ("Yoga class fee", "others"),
    ("Taxi to airport", "transport"),
    ("Lunch delivery", "food"),
    ("Eye drops purchase", "medical"),
    ("Movie streaming subscription", "others"),
    ("Metro recharge", "transport")
]


In [58]:
texts, labels = zip(*data)
texts,labels

(('Bought pizza',
  'Groceries shopping',
  'Lunch at restaurant',
  'Dinner with family',
  'Bought vegetables',
  'Bought fruits',
  'Ordered online food',
  'Coffee from cafe',
  'Snacks from shop',
  'Ice cream purchase',
  'Doctor consultation',
  'Medicine purchase',
  'Bought vitamins',
  'Annual health checkup',
  'Dental appointment',
  'Eye checkup',
  'Vaccination fee',
  'Hospital visit',
  'Pharmacy bill',
  'Blood test',
  'Bus ticket',
  'Fuel for car',
  'Taxi ride',
  'Train ticket',
  'Flight booking',
  'Uber fare',
  'Metro pass',
  'Gas refill',
  'Parking fee',
  'Bike repair',
  'Movie ticket',
  'Gym membership',
  'Book purchase',
  'Concert ticket',
  'Music subscription',
  'Gift for friend',
  'Clothes shopping',
  'Online course',
  'Software purchase',
  'Game purchase',
  'Dinner at cafe',
  'Bought salad',
  'Car service',
  'Bought cough syrup',
  'Yoga class fee',
  'Taxi to airport',
  'Lunch delivery',
  'Eye drops purchase',
  'Movie streaming subsc

In [59]:
#converting text data into numerical vectors
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts).toarray()
X


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [60]:
# Encoding the labels
encoder = LabelEncoder()
y = encoder.fit_transform(labels)
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 3, 1,
       2, 3, 0, 1, 2, 3])

In [61]:
#Train and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)


In [62]:
#Define model
class ExpenseClassifier(nn.Module):
    def __init__(self, input_size,hidden_size, num_classes):
        super(ExpenseClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self,x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [63]:
input_size = X_train.shape[1]
hidden_size = 64
num_classes = len(set(y))

model = ExpenseClassifier(input_size, hidden_size, num_classes)

In [64]:
#Loss and optimizer
criterion = nn.CrossEntropyLoss() # compares predicted catgeory vs actual
optimizer = optim.Adam(model.parameters(), lr=0.01) # updates model weights efficiently


In [65]:
#Training loop
epochs = 200
for epoch in range(epochs):
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if(epoch+1)%10==0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [10/200], Loss: 0.9533
Epoch [20/200], Loss: 0.2465
Epoch [30/200], Loss: 0.0237
Epoch [40/200], Loss: 0.0036
Epoch [50/200], Loss: 0.0013
Epoch [60/200], Loss: 0.0008
Epoch [70/200], Loss: 0.0006
Epoch [80/200], Loss: 0.0005
Epoch [90/200], Loss: 0.0004
Epoch [100/200], Loss: 0.0004
Epoch [110/200], Loss: 0.0004
Epoch [120/200], Loss: 0.0003
Epoch [130/200], Loss: 0.0003
Epoch [140/200], Loss: 0.0003
Epoch [150/200], Loss: 0.0003
Epoch [160/200], Loss: 0.0003
Epoch [170/200], Loss: 0.0003
Epoch [180/200], Loss: 0.0002
Epoch [190/200], Loss: 0.0002
Epoch [200/200], Loss: 0.0002


In [66]:
#Evaluation
with torch.inference_mode():
    y_pred = model(X_test) #forward pass
    y_pred_classes = torch.argmax(y_pred, dim=1 ) #pick highest probability class
    acc = (y_pred_classes == y_test).float().mean() #accuracy
    print(f'Accuracy: {acc.item():.4f}')

Accuracy: 0.4000


In [None]:
def predict(text):
    vec = vectorizer.transform([text]).toarray()
    vec = torch.tensor(vec, dtype=torch.float32)
    output = model(vec)
    pred = torch.argmax(output, 1).item()
    return encoder.inverse_transform([pred])[0]

print(predict("Bought apples"))   
print(predict("Hospital bill"))   
print(predict("Taxi fare"))       
print(predict("Netflix subscription")) 
print(predict("Train from Velachery"))


food
medical
transport
others
transport
