In [1]:
import os
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
datadir = "/home/karthiktiwari/Downloads/Beer Data"

In [3]:
beer_train = pd.read_csv(os.path.join(datadir,"beer_profile_and_ratings.csv"), index_col=0)
beer_train.reset_index(drop=True, inplace=True)

In [4]:
beer_train.head(5)

Unnamed: 0,Style,Brewery,Beer Name (Full),Description,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,...,Fruits,Hoppy,Spices,Malty,review_aroma,review_appearance,review_palate,review_taste,review_overall,number_of_reviews
0,Altbier,Alaskan Brewing Co.,Alaskan Brewing Co. Alaskan Amber,"Notes:Richly malty and long on the palate, wit...",5.3,25,50,13,32,9,...,33,57,8,111,3.498994,3.636821,3.556338,3.643863,3.847082,497
1,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Double Bag,"Notes:This malty, full-bodied double alt is al...",7.2,25,50,12,57,18,...,24,35,12,84,3.798337,3.846154,3.904366,4.024948,4.034304,481
2,Altbier,Long Trail Brewing Co.,Long Trail Brewing Co. Long Trail Ale,Notes:Long Trail Ale is a full-bodied amber al...,5.0,25,50,14,37,6,...,10,54,4,62,3.409814,3.667109,3.600796,3.6313,3.830239,377
3,Altbier,Uerige Obergärige Hausbrauerei GmbH / Zum Uerige,Uerige Obergärige Hausbrauerei GmbH / Zum Ueri...,Notes:,8.5,25,50,13,55,31,...,49,40,16,119,4.148098,4.033967,4.150815,4.205163,4.005435,368
4,Altbier,Ninkasi Brewing Company,Ninkasi Brewing Company Sleigh'r Dark Doüble A...,Notes:Called 'Dark Double Alt' on the label.Se...,7.2,25,50,25,51,26,...,11,51,20,95,3.625,3.973958,3.734375,3.765625,3.817708,96


In [5]:
beer_train.columns

Index(['Style', 'Brewery', 'Beer Name (Full)', 'Description', 'ABV', 'Min IBU',
       'Max IBU', 'Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour',
       'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty', 'review_aroma',
       'review_appearance', 'review_palate', 'review_taste', 'review_overall',
       'number_of_reviews'],
      dtype='object')

In [6]:
beer_train[beer_train['ABV']>57]

Unnamed: 0,Style,Brewery,Beer Name (Full),Description,ABV,Min IBU,Max IBU,Astringency,Body,Alcohol,...,Fruits,Hoppy,Spices,Malty,review_aroma,review_appearance,review_palate,review_taste,review_overall,number_of_reviews
299,Bock - Eisbock,Schorschbräu,Schorschbräu Schorschbock 57%,Notes:,57.5,25,35,0,7,17,...,10,0,2,4,4.0,4.0,4.0,3.5,4.0,1


In [7]:
class BeerDataset(Dataset):
    def __init__(self, df, config):
        """
        Custom dataset class for reading beer attributes and review score from dataframe

        Args:
            df (Pandas DataFrame): Pandas Dataframe with Beer Data
        """
        self.df = df
        self.config = config

    def __getitem__(self, index):
        beer_attrs = []
        for attr in self.config:
            beer_attrs.append(self.df[attr].iloc[index])
        score = torch.as_tensor(np.round(self.df['review_overall'].iloc[index]).item()-1, dtype=torch.long)

        return {"attrs": torch.as_tensor(beer_attrs, dtype=torch.float32), "score": score}


    def __len__(self):
        return len(self.df)

In [8]:
config = ['ABV', 'Min IBU',
       'Max IBU', 'Astringency', 'Body', 'Alcohol', 'Bitter', 'Sweet', 'Sour',
       'Salty', 'Fruits', 'Hoppy', 'Spices', 'Malty']

val_size = int(0.1*len(beer_train))
beerDataset = BeerDataset(beer_train[:-val_size], config=config)
valDataset = BeerDataset(beer_train[val_size:], config=config)

In [9]:
beerDataset[1208]

{'attrs': tensor([ 4., 18., 25., 23., 25.,  6., 26., 26., 24.,  0., 17., 37.,  0., 47.]),
 'score': tensor(2)}

In [10]:
class BeerReviewPredictor(nn.Module):
    def __init__(self, config):
        super(BeerReviewPredictor, self).__init__()

        self.fc1 = nn.Linear(len(config), 256)
        self.fc2 = nn.Linear(256, 32)
        self.fc3 = nn.Linear(32, 5)

    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        return x

In [11]:
model = BeerReviewPredictor(config)


In [12]:
model.to('cuda')

BeerReviewPredictor(
  (fc1): Linear(in_features=14, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=5, bias=True)
)

In [13]:
model(torch.as_tensor([1 for i in range(len(config))], dtype=torch.float32, device='cuda').reshape(-1, len(config)))

tensor([[0.0000, 0.0000, 0.0869, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<ReluBackward0>)

In [14]:
# Dataloaders

trainset = DataLoader(beerDataset, batch_size=128, shuffle=True)
valset = DataLoader(valDataset, batch_size=128, shuffle=True)

In [15]:
for batch in trainset:
    print(batch['attrs'].shape, batch['score'].shape)
    break

torch.Size([128, 14]) torch.Size([128])


In [16]:
for row in range(len(beerDataset)):
    assert beerDataset[row]['attrs'] is not None and beerDataset[row]['score'] is not None

In [None]:
import torch.optim as optim
from tqdm import tqdm

optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7)
loss_fn = nn.CrossEntropyLoss()
epochs = 25
device = 'cuda'
for epoch in range(epochs):
    total_train_loss = 0.0
    total_val_loss = 0.0

    model.train()
    for i, batch in enumerate(tqdm(trainset)):
        attrs, score = batch['attrs'].to(device), batch['score'].to(device)
        optimizer.zero_grad()
        output = model(attrs)
        loss = loss_fn(output, score)
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    model.eval()
    for i, batch in enumerate(tqdm(valset)):
        attrs, score = batch['attrs'].to(device), batch['score'].to(device)
        optimizer.zero_grad()
        output = model(attrs)
        loss = loss_fn(output, score)
        total_val_loss += loss.item()

    scheduler.step()
    print(f"Train loss on epoch {epoch + 1}={total_train_loss/len(trainset)}")
    print(f"Val loss on epoch {epoch + 1}={total_val_loss / len(valset)}")

100%|██████████| 23/23 [00:00<00:00, 40.86it/s]
100%|██████████| 23/23 [00:00<00:00, 55.69it/s]


Train loss on epoch 1=0.8321035556171251
Val loss on epoch 1=0.7439431703608969


100%|██████████| 23/23 [00:00<00:00, 48.14it/s]
100%|██████████| 23/23 [00:01<00:00, 13.48it/s]


Train loss on epoch 2=0.7091074456339297
Val loss on epoch 2=0.717691403368245


100%|██████████| 23/23 [00:02<00:00,  9.18it/s]
100%|██████████| 23/23 [00:00<00:00, 51.12it/s]


Train loss on epoch 3=0.7012837010881176
Val loss on epoch 3=0.7261632836383322


100%|██████████| 23/23 [00:00<00:00, 46.64it/s]
100%|██████████| 23/23 [00:00<00:00, 51.45it/s]


Train loss on epoch 4=0.7012478631475697
Val loss on epoch 4=0.7042790703151537


100%|██████████| 23/23 [00:00<00:00, 46.76it/s]
100%|██████████| 23/23 [00:00<00:00, 45.81it/s]


Train loss on epoch 5=0.6887403247149094
Val loss on epoch 5=0.6938116731850997


100%|██████████| 23/23 [00:00<00:00, 50.44it/s]
100%|██████████| 23/23 [00:00<00:00, 55.97it/s]


Train loss on epoch 6=0.6916883587837219
Val loss on epoch 6=0.721934639889261


100%|██████████| 23/23 [00:00<00:00, 48.55it/s]
100%|██████████| 23/23 [00:00<00:00, 52.29it/s]


Train loss on epoch 7=0.6753898029742034
Val loss on epoch 7=0.6932482615761135


100%|██████████| 23/23 [00:00<00:00, 48.69it/s]
100%|██████████| 23/23 [00:00<00:00, 55.75it/s]


Train loss on epoch 8=0.6696635873421378
Val loss on epoch 8=0.6876542049905529


100%|██████████| 23/23 [00:00<00:00, 50.75it/s]
100%|██████████| 23/23 [00:00<00:00, 47.72it/s]


Train loss on epoch 9=0.6656299596247466
Val loss on epoch 9=0.6886347700720248


100%|██████████| 23/23 [00:00<00:00, 49.70it/s]
100%|██████████| 23/23 [00:00<00:00, 57.46it/s]


Train loss on epoch 10=0.6617323833963146
Val loss on epoch 10=0.6833665759667106


100%|██████████| 23/23 [00:00<00:00, 49.96it/s]
100%|██████████| 23/23 [00:00<00:00, 54.69it/s]


Train loss on epoch 11=0.6594804577205492
Val loss on epoch 11=0.6831856136736663


100%|██████████| 23/23 [00:00<00:00, 48.76it/s]
100%|██████████| 23/23 [00:00<00:00, 50.88it/s]


Train loss on epoch 12=0.6574377054753511
Val loss on epoch 12=0.6899328335471775


100%|██████████| 23/23 [00:00<00:00, 42.26it/s]
100%|██████████| 23/23 [00:00<00:00, 53.33it/s]


Train loss on epoch 13=0.6513656248217043
Val loss on epoch 13=0.6828040363995925


100%|██████████| 23/23 [00:00<00:00, 50.09it/s]
100%|██████████| 23/23 [00:00<00:00, 54.61it/s]


Train loss on epoch 14=0.6511331563410552
Val loss on epoch 14=0.6742594112520632


100%|██████████| 23/23 [00:00<00:00, 52.84it/s]
100%|██████████| 23/23 [00:00<00:00, 55.33it/s]

Train loss on epoch 15=0.6510307775891345
Val loss on epoch 15=0.6699494680632716



