# Animal Vision - Helper Script

# 1. Getting partial data - get_data.py

In [None]:
%%writefile get_data.py
"""
Contains code to download the animal data for three classes
and process into the format required as per the 
torchvision ImageFolder function.
Data is downloaded through kaggle dataset, for this
kaggle.json file is required.
"""
import os
from pathlib import Path
import shutil
from pip._internal.cli.main import main
package_names=['split-folders', 'opendatasets'] #packages to install
main(['install'] + package_names + ['--upgrade']) 
import splitfolders
import opendatasets as od

def download_data():
    """
    Download data from kaggle.
    Kaggle username and key will be needed to download the data.
    You can find your username and key from your account 
    Kaggle.com -> Account -> API -> click on 'Create New API Token'
    """
    # Download the dataset
    kaggle_dataset_url = 'https://www.kaggle.com/datasets/jerrinbright/cheetahtigerwolf'
    temp_data_path = Path('temp_data')
    if temp_data_path.exists():
        print(f'{temp_data_path} directory exist')
    else:
        print('Downloading data...')
        od.download_kaggle_dataset(kaggle_dataset_url,
                                   temp_data_path)

    # Copying three classes data from the raw data
    raw_data = Path('temp_data/cheetahtigerwolf/ANIMALS/ANIMALS')
    data_extracted_path = Path('temp_data/animal_data')
    animal_list = ['LION', 'TIGER', 'WOLF']
    if data_extracted_path.exists():
        print(f'\n{raw_data} directory exist')
    else:
        print('\nCopying raw data...')
        for i in animal_list:
            shutil.copytree(raw_data / i, data_extracted_path / i)
            print(f'{i} directory created and files copied in "{data_extracted_path/i}"')

    # Splitting the data in Train Test Split in ratio(80:20).
    data_path = Path('data')
    lion_tiger_wolf_path = data_path / 'lion_tiger_wolf'
    if lion_tiger_wolf_path.exists():
        print(f'\nSplit directory exist - {lion_tiger_wolf_path}')
    else:
        print('\nSpliting data in Train and Test set')
        splitfolders.ratio(data_extracted_path, str(lion_tiger_wolf_path), 42, (0.80, 0.20))

    shutil.rmtree(temp_data_path)
    print(f'\n{temp_data_path} directory is deleted')

In [None]:
import get_data
get_data.download_data()

# 2. Creating a script to display training results- plot_loss_curves.py

In [None]:
%%writefile plot_loss_curves.py
"""
Contains code to display the result
obtained after training the model.
"""
import matplotlib.pyplot as plt

# Plot loss and accuracy curves
def plot_curves(model_result: dict):
    """
    Plots the loss and accuracy curves for the training and evaluation.
    Args:
      model_result: A dict in format of {'train_loss': [],
                                         'train_acc': [],
                                         'test_loss': [],
                                         'test_acc': []} 
    """
    # Get loss values
    train_loss = model_result['train_loss']
    test_loss = model_result['test_loss']

    # get accuracy values
    train_accuracy = model_result['train_acc']
    test_accuracy = model_result['test_acc']

    # get total epochs
    epochs = range(len(model_result['train_loss']))

    # plot the curves
    plt.figure(figsize=(15, 7))
    # Loss
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, label='Train Loss')
    plt.plot(epochs, test_loss, label='Test Loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend()
    # Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracy, label='Train Accuracy')
    plt.plot(epochs, test_accuracy, label='Test Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend();

# Copying the python file to drive

In [None]:
import shutil
import os
for i in os.listdir('/content/'):
    if i.endswith('.py'):
        shutil.copy2(i, '/content/drive/MyDrive/Colab Notebooks/My Project/Image Classification/Animal Vision/helper_scripts')

# 3. Getting Complete data - get_full_data.py

In [5]:
%%writefile helper_scripts/get_full_data.py
"""
Contains code to download the animal data
and process into the format required as per the 
torchvision ImageFolder function.
Data is downloaded through kaggle dataset, for this
kaggle.json api file is required.
"""
import os
from pathlib import Path
import shutil
from pip._internal.cli.main import main
package_names=['split-folders', 'opendatasets'] #packages to install
main(['install'] + package_names + ['--upgrade']) 
import splitfolders
import opendatasets as od

def download_data():
    """
    Download data from kaggle.
    Kaggle username and key will be needed to download the data.
    You can find your username and key from your account 
    Kaggle.com -> Account -> API -> click on 'Create New API Token'
    """
    # Download the dataset
    kaggle_dataset_url = 'https://www.kaggle.com/datasets/jerrinbright/cheetahtigerwolf'
    temp_data_path = Path('temp_data')
    if temp_data_path.exists():
        print(f'{temp_data_path} directory exist')
    else:
        print('Downloading data...')
        od.download_kaggle_dataset(kaggle_dataset_url,
                                   temp_data_path)

    # Splitting the data in Train Test Split in ratio(80:20).
    raw_data = Path('temp_data/cheetahtigerwolf/ANIMAL-N30/ANIMALS')
    data_path = Path('data')
    animal30_path = data_path / 'animal30_classes'
    if animal30_path.exists():
        print(f'\nSplit directory exist - {animal30_path}')
    else:
        print('\nSpliting data in Train and Test set')
        splitfolders.ratio(raw_data, str(animal30_path), 42, (0.80, 0.20), move=True)

    shutil.rmtree(temp_data_path)
    print(f'\n{temp_data_path} directory is deleted')

Writing helper_scripts/get_full_data.py
