## **Installing necessary packages**

In [None]:
!pip install -Uqq fastai fastbook nbdev

## **Using required imports and magic functions**

In [None]:
import warnings
from fastai import *
import seaborn as sns
from fastbook import *
from fastai.vision.all import *
import matplotlib.pyplot as plt
sns.set_theme(style='darkgrid')
warnings.filterwarnings('ignore')

%reload_ext autoreload
%autoreload 2
%matplotlib inline

## **Maping bengali names to the scientific names**

In [None]:
bengali_fishes = {
    "Ayre": "Sperata aor",
    "Catla": "Catla catla",
    "Chital":"Chitala chitala",
    "Ilish": "Tenualosa ilisha",
    "Koi": "Cyprinus rubrofuscus",
    "Kachki": "Corica soborna Hamilo",
    "Kajoli": "Ailia coila",
    "Magur": "Clarias batrachus",
    "Mrigal": "Cirrhinus cirrhosus",
    "Mola Dhela": "Osteobrama cotio",
    "Pabda": "Ompok bimaculatus",
    "Pangash": "Pangasius pangasius",
    "Poa": "Otolithoides pama",
    "Puti": "Puntius sophore",
    "Rui": "Labeo rohita",
    "Shing": "Heteropneustes fossilis",
    "Silver Carp": "Hypophthalmichthys molitrix",
    "Taki": "Channa striata",
    "Tengra": "Mystus tengara",
    "Telapia": "Oreochromis niloticus",
}

## **Mounting Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **Intializing project folder path**

In [None]:
%cd /content/drive/MyDrive/Bengali Fish Recognizer

/content/drive/MyDrive/Bengali Fish Recognizer


## **Function to initialize or create a required directory**

In [None]:
def folder_initialize(folder_path_name):
  if not os.path.exists(folder_path_name):
    os.mkdir(folder_path_name)

## **Creation of `Data` path**

In [None]:
data_path = "Data"
folder_initialize(data_path)

## **Collection of Image Data**

In [None]:
for key in bengali_fishes.keys():
  dest = f'{data_path}/{key}'
  folder_initialize(dest)
  try:
    image_urls = search_images_ddg(f"{bengali_fishes[key]}", max_images=10)
    download_images(dest, urls=image_urls)
  except:
    continue

## **Grabbing image files**

In [None]:
image_files = get_image_files(data_path)

## **Checking if the images can be opened or not**

In [None]:
failed = verify_images(image_files)
failed

(#0) []

## **Deletion of unavailable image paths**

In [None]:
failed.map(Path.unlink)

(#0) []

## **Categories in `Data` directory**

In [None]:
labels = []
for label in os.listdir(data_path):
  labels.append(label)

## **Mapping category wise images numbers**

In [None]:
data = {}
for label in labels:
  count = len([entry for entry in os.listdir(f"{data_path}/{label}") if os.path.isfile(os.path.join(f"{data_path}/{label}", entry))])
  data[label] = count

## **Function to show images distribution**

In [None]:
def plot_images_distribution(dictionary):
  labels = dictionary.keys()
  number_of_images = dictionary.values()

  plt.bar(labels, number_of_images)
  plt.xlabel("Labels")
  plt.ylabel("Number of images")
  plt.title("Image distribution")
  plt.show()

## **Viewing Image Distribution for each label**

In [None]:
plot_images_distribution(data)

## **Initialization of Data Loader**

In [None]:
data_block = DataBlock(
    blocks = (ImageBlock, CategoryBlock),
    splitter = RandomSplitter(valid_pct=0.2, seed = 42),
    get_items = get_image_files,
    get_y = parent_label,
    item_tfms=Resize(224),
    batch_tfms = aug_transforms()
)

## **Initialization of Data Loader Path**

In [None]:
data_loader_path = 'DataLoaders'
folder_initialize(data_loader_path)

## **Declaration of Batch Size**

In [None]:
batch_size = 32

## **Creation of Data Loader**

In [None]:
data_loader = data_block.dataloaders(data_path, bs=batch_size)

## **Initialization of Data Loader version & Storing the dataloader**

In [None]:
version = 1
torch.save(data_loader, f"{data_loader_path}/DataLoader_v_{version}")