# day 264

# 1. Turn the code to get the data (from section 1. Get Data above) into a Python script, such as get_data.py.

* When you run the script using python get_data.py it should check if the data already exists and skip downloading if it does.

* If the data download is successful, you should be able to access the pizza_steak_sushi images from the data directory.

In [1]:
import numpy as np
import torch
import torchvision
import os,requests,zipfile
from pathlib import Path
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from mlxtend.plotting import plot_confusion_matrix
import time
from tqdm.auto import tqdm
from PIL import Image
import io
import matplotlib.pyplot as plt

In [2]:
# writing a python script for it:
folder = Path('python_scripts')
folder.mkdir(parents=True,exist_ok=True)

with open(Path(folder / 'get_data.py'),'w') as f:

  f.write(
"""
# importing the essentials
from pathlib import Path
import requests, zipfile
def download_and_unzip(url):
  '''
  Args:
  1. creates a folder.
  2. download the data only if the zip_path was not already created such that we can avoid.
  redownloading the data more than once.
  3. unzips the file and extract it all to the intended location.
'''

  data = Path('data')
  image_path = data / 'images'

  # making a directory:
  image_path.mkdir(parents=True,exist_ok=True)

  # downloading the data:
  zip_path = data / 'images.zip'
  if not zip_path.exists():
    with open(Path(data / 'images.zip'),'wb') as f:
      link = requests.get(url).content
      f.write(link)

    # unzip the file:
    with zipfile.ZipFile(Path(data / 'images.zip'),'r') as zip:
      zip.extractall(image_path)
      zip.close()

  else:
    print("Files have already been downloaded")

"""
  )

In [3]:
import sys

sys.path.append('/content/python_scripts')

In [4]:
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/python_scripts']

In [5]:
# importing it:
from python_scripts import get_data

In [6]:
# testing it:
get_data.download_and_unzip(url='https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip')

# 2. Use Python's argparse module to be able to send the train.py custom hyperparameter values for training procedures.
* Add an argument for using a different:
* Training/testing directory
*  Learning rate
*  Batch size
*  Number of epochs to train for
*  Number of hidden units in the TinyVGG model
*  Keep the default values for each of the above arguments as what they already are (as in notebook 05).
* For example, you should be able to run something similar to the following line to train a TinyVGG model with a learning rate of 0.003 and a batch size of 64 for 20 epochs: python train.py --learning_rate 0.003 --batch_size 64 --num_epochs 20.
* Note: Since train.py leverages the other scripts we created in section 05, such as, model_builder.py, utils.py and engine.py, you'll have to make sure they're available to use too. You can find these in the going_modular folder on the course GitHub.


## goals:
1. prepare train_dataloader and test_dataloader with a python script.
   * create the train_dir and test_dir
   * create train_dataset and test_dataset using torch.utils.data.dataset + customImageFolder
   * interpolate the dataset into DataLoader to get the train_dataloader and test_dataloader.
   * create the python script to do it all with one click.
2. create a TinyVGG model with python script.
3. create the training engine.py
4. then modify the whole code with python's argparse to add customizable learning_rate,batch_size,epochs.


In [5]:
# converting the whole thing into a script:
folder = Path('python_scripts')
folder.mkdir(parents=True,exist_ok=True)

with open(Path(folder / 'prepare_data.py'),'w') as f:
  f.write(
"""
# importing the essentials:
import torch,os,torchvision
from PIL import Image
from pathlib import Path


#prepare train_datafolder and test_dataloader:

def create_dataloaders(train_dir,test_dir,batches):
  # find_classes function
  def find_classes(directory):
    class_names = sorted(os.listdir(directory))

    if not class_names:
      raise FileNotFoundError("Please check your directory: {}".format(directory))

    class_idx = {class_name:idx for idx,class_name in enumerate(class_names)}

    return class_names,class_idx

  # create the customImageFolder:
  class CustomImageFolder(torch.utils.data.Dataset):
    def __init__(self,directory,transform=None):
      self.image_paths = list(Path(directory).glob('*/*.jpg'))
      self.transform = transform
      self.class_names,self.class_idx = find_classes(directory)

    def load_img(self,index):
      return Image.open(self.image_paths[index])

    # overwrite the len function and getitem function:
    def __len__(self):
      return len(self.image_paths)

    def __getitem__(self,index):
      img = self.load_img(index)
      class_names = self.image_paths[index].parent.stem
      label = self.class_idx[class_names]

      if self.transform:
        return self.transform(img),label

      else:
        return img,label


  # Augmentation Techniques:
  train_transform = torchvision.transforms.Compose([
      torchvision.transforms.Resize(size=[64,64]),
      torchvision.transforms.TrivialAugmentWide(),
      torchvision.transforms.ToTensor()
  ])

  test_transform = torchvision.transforms.Compose([
      torchvision.transforms.Resize(size=[64,64]),
      torchvision.transforms.ToTensor()
  ])

  # creating the datasets:

  train_dataset = CustomImageFolder(train_dir,train_transform)
  test_dataset = CustomImageFolder(test_dir,test_transform)

  # creating the data loader:
  train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                batch_size=batches,
                                                shuffle=True,
                                                num_workers=os.cpu_count(),
                                                pin_memory=True)

  test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                                batch_size=batches,
                                                shuffle=False,
                                                num_workers=os.cpu_count(),
                                                pin_memory=True)

  class_names = train_dataset.class_names

  return class_names,train_dataloader,test_dataloader

  """
  )

In [6]:
# importing it.
from python_scripts import prepare_data

train_dir = 'data/images/train'
test_dir = 'data/images/test'

class_names,train_dataloader,test_dataloader = prepare_data.create_dataloaders(train_dir,test_dir,32)

In [7]:
# creating the TinyVGG model in pytorch script
with open(Path('python_scripts/TinyVGG_model.py'),'w') as f:
  f.write(
"""

# importing the essentials
import torch

# Tiny VGG model creation
class TinyVGG(torch.nn.Module):
  def __init__(self,i,o,h):
    super().__init__()

    self.conv_block1 = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=i,
                        out_channels=h,
                        kernel_size=3,
                        stride=1,
                        padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(in_channels=h,
                        out_channels=h,
                        kernel_size=3,
                        stride=1,
                        padding=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=3,stride=1)
    )
    self.conv_block2 = torch.nn.Sequential(
        torch.nn.Conv2d(in_channels=h,
                        out_channels=h,
                        kernel_size=3,
                        stride=1,
                        padding=1),
        torch.nn.ReLU(),
        torch.nn.Conv2d(in_channels=h,
                        out_channels=h,
                        kernel_size=3,
                        stride=1,
                        padding=1),
        torch.nn.ReLU(),
        torch.nn.MaxPool2d(kernel_size=3,stride=1)

    )
    self.classifier = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(in_features=h*3600,out_features=o)
    )


  def forward(self,input):
    return self.classifier(self.conv_block2(self.conv_block1(input)))

  """)




In [8]:
from python_scripts import TinyVGG_model

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = TinyVGG_model.TinyVGG(i=3,o=len(class_names),h=10).to(device)


In [9]:
# creating engine.py

with open(Path('python_scripts/engine.py'),'w') as f:
  f.write(
"""
# importing the essentials:
import torch
import time
from tqdm.auto import tqdm

# training engine creation

def run(model,train_dataloader,test_dataloader,num_epochs,learning_rate):

  # compilation
  loss = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(params=model.parameters(),
                              lr=learning_rate)
  def accuracy(pred,actual):
    correct = torch.eq(pred,actual).sum().item()
    acc = correct/len(actual)
    return acc

  def total_time(start,end):
    total = end - start
    print("Total Running Time is = ",total)

  # seed;
  torch.cuda.manual_seed(42)
  torch.manual_seed(42)

  # containers;
  history,tra,tsa,trl,tsl = [],[],[],[],[]

  start = time.perf_counter()


  for epoch in tqdm(range(num_epochs)):
    a_train_loss,a_train_acc,a_test_loss,a_test_acc = 0,0,0,0

    # training
    model.train()
    for x_train,y_train in train_dataloader:
      train_logits = model(x_train)
      train_predictions = train_logits.argmax(dim=1)
      train_actuals = y_train
      train_loss = loss(train_logits,train_actuals)
      train_acc = accuracy(train_predictions,train_actuals)
      a_train_loss += train_loss
      a_train_acc += train_acc

      # zero gradin the optimizer
      optimizer.zero_grad()

      # backpropogating the train loss
      train_loss.backward()

      # stepping the optimizer up a notch
      optimizer.step()

    # normalize the accumulated train loss and train accuracy
    a_train_loss /= len(train_dataloader)
    a_train_acc /= len(train_dataloader)

    # evaluation time
    model.eval()
    with torch.inference_mode():
      for x_test,y_test in test_dataloader:
        test_logits = model(x_test)
        test_predictions = test_logits.argmax(dim=1)
        test_actuals = y_test
        test_loss = loss(test_logits,test_actuals)
        test_acc = accuracy(test_predictions,test_actuals)
        a_test_acc += test_acc
        a_test_loss += test_loss

      # normalizing the accumulations
      a_test_acc /= len(test_dataloader)
      a_test_loss /= len(test_dataloader)

    # putting things in container
    tsa.append(a_test_acc)
    tsl.append(a_test_loss)
    trl.append(a_train_loss)
    tra.append(a_train_acc)

    # printing the results:
    print("Epoch:{} | Train Loss:{:.3f}, Train Acc: {:.3f} | Test Loss: {:.3f} , Test Acc: {:.3f}"\
          .format(epoch,a_train_loss,a_train_acc,a_test_loss,a_test_acc))


  end = time.perf_counter()
  return history.extend([trl,tra,tsl,tsa])
  """
)

In [10]:
from python_scripts import engine

engine.run(model,train_dataloader,test_dataloader,num_epochs=5,learning_rate=0.001)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:0 | Train Loss:1.154, Train Acc: 0.350 | Test Loss: 1.107 , Test Acc: 0.297
Epoch:1 | Train Loss:1.095, Train Acc: 0.383 | Test Loss: 1.089 , Test Acc: 0.411
Epoch:2 | Train Loss:1.074, Train Acc: 0.425 | Test Loss: 1.012 , Test Acc: 0.469
Epoch:3 | Train Loss:1.020, Train Acc: 0.542 | Test Loss: 1.002 , Test Acc: 0.464
Epoch:4 | Train Loss:1.000, Train Acc: 0.490 | Test Loss: 0.913 , Test Acc: 0.582


In [11]:
# doing it all at once with customizable hyperparameters:


with open(Path('python_scripts/train.py'),'w') as f:
  f.write(
"""
import argparse
import torch
parser = argparse.ArgumentParser(description="Throw your Hyperparameters in me!!! ")

parser.add_argument("--num_epochs",
                    default=5,
                    type=int,
                    help='number of epochs')

parser.add_argument('--learning_rate',
                    default=0.001,
                    type=float,
                    help='Learning Rate for Optimizer')

parser.add_argument('--batch_size',
                    default=32,
                    type=int,
                    help='Batch size for dataloader')

parser.add_argument('--h',
                    default=10,
                    type=int,
                    help='Hidden units for the neural network')

parser.add_argument('--train_dir',
                    default='data/images/train',
                    type=str,
                    help='it is the training data location')

parser.add_argument('--test_dir',
                    default='data/images/test',
                    type=str,
                    help='it is the testing data location')

args = parser.parse_args([])

num_epochs = args.num_epochs
h = args.h
batch_size = args.batch_size
lr = args.learning_rate

# juicing things up!

## dataloaders
from python_scripts import prepare_data

train_dir = 'data/images/train'
test_dir = 'data/images/test'

class_names,train_dataloader,test_dataloader = prepare_data.create_dataloaders(train_dir,test_dir,32)

## model building
from python_scripts import TinyVGG_model

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = TinyVGG_model.TinyVGG(i=3,o=len(class_names),h=h).to(device)


## getting the engine ready:
from python_scripts import engine

engine.run(model,train_dataloader,test_dataloader,num_epochs=num_epochs,learning_rate=lr)
"""
)



In [64]:
# testing it out!

from python_scripts import train

train.num_epochs = 10
train.learning_rate = 0.003
train.h = 15
batch_size = 32
!python python_scripts/train.py

Traceback (most recent call last):
  File "/content/python_scripts/train.py", line 46, in <module>
    from python_scripts import prepare_data
ModuleNotFoundError: No module named 'python_scripts'


In [27]:
from python_scripts import train

# exercise 3: Create a script to predict (such as predict.py) on a target image given a file path with a saved model.For example, you should be able to run the command python predict.py some_image.jpeg and have a trained PyTorch model predict on the image and return its prediction. To see example prediction code, check out the predicting on a custom image section in notebook 04.You may also have to write code to load in a trained model.

## goals:
1. download an image from internet and store it in folder named downloads
2. read the image using torchvision.io.read_image().type(torch.float32)/255.
3. resize the image.
4. unsqueeze the image to add extra dimension for the model to predict on
5. show the image.
6. test the model to predict.
7. python scriptify the code which must be named predict.py

In [14]:
# downloading the image
folder = Path('downloads')
folder.mkdir(parents=True,exist_ok=True)

with open(Path(folder / 'image1'),'wb') as f:
  response = requests.get('https://www.indianhealthyrecipes.com/wp-content/uploads/2015/10/pizza-recipe-1.jpg').content
  f.write(response)




In [15]:
# read and modify the

"""
1. convert the image to tensor.
2. change the type to torch.float32
3. normalize the numbers to range between 0 and 1.
"""
image_tensor = torchvision.io.read_image('downloads/image1').type(torch.float32)/255.

# resize the tensor to that of the images the model was trained with.
resize = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=[64,64],antialias=True)
])

image_tensor = resize(image_tensor)

# unsqueezing it (to account for the batch dimension)
image_tensor = torch.unsqueeze(image_tensor,dim=0)

In [16]:
# testing the model to predict on the image
model.eval()
with torch.inference_mode():
  logit = model(image_tensor)
  prediction = logit.argmax(dim=1)
  pred_prob = torch.max(torch.softmax(logit,dim=1))

pred_prob,class_names[prediction]

(tensor(0.4530), 'pizza')

In [22]:
with open('python_scripts/predict.py','w') as f:
  f.write(

"""
from pathlib import Path
import torchvision
import torch
import requests

def predict(url,model,class_names):
  #   Args:
  #  1. download an image from internet and store it in folder named downloads
  #  2. read the image using torchvision.io.read_image().type(torch.float32)/255.
  #  3. resize the image.
  #  4. unsqueeze the image to add extra dimension for the model to predict on
  #  5. show the image.
  #  6. test the model to predict.
  #  7. python scriptify the code which must be named predict.py


  # downloading the image
  folder = Path('downloads')
  folder.mkdir(parents=True,exist_ok=True)

  with open(Path(folder / 'image1'),'wb') as f:
    response = requests.get(url).content
    f.write(response)

  # read and modify the image

  image_tensor = torchvision.io.read_image('downloads/image1').type(torch.float32)/255.

  # resize the tensor to that of the images the model was trained with.
  resize = torchvision.transforms.Compose([
      torchvision.transforms.Resize(size=[64,64],antialias=True)
  ])

  image_tensor = resize(image_tensor)

  # unsqueezing it (to account for the batch dimension)
  image_tensor = torch.unsqueeze(image_tensor,dim=0)

  # testing the model to predict on the image
  model.eval()
  with torch.inference_mode():
    logit = model(image_tensor)
    prediction = logit.argmax(dim=1)
    pred_prob = torch.max(torch.softmax(logit,dim=1))

  print("food I see is: %s with %s %% probability:"%(class_names[prediction],pred_prob.numpy) )


"""

  )

In [23]:
from python_scripts import predict

predict.predict('https://www.indianhealthyrecipes.com/wp-content/uploads/2015/10/pizza-recipe-1.jpg',model,class_names)

food I see is: pizza with tensor(0.4530) probability:


# exploration,analysis and freethrows

In [40]:
import sys
sys.path.append('/content/python_scripts')

In [41]:
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/python_scripts']

In [63]:
import train

In [None]:
train.__file__

In [55]:
train.num_epochs = 10
train.h = 15
train.lr = 0.003
train.batch_size = 32



In [59]:
from python_scripts import *

In [60]:
from python_scripts.train import *

In [62]:
from python_scripts.predict import *