# **Prepare Data**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import timm
import cv2
import albumentations as A
from glob import glob
import os
from autogluon.multimodal import MultiModalPredictor
from tqdm.notebook import tqdm

  import pkg_resources


In [None]:
train_df = pd.read_csv('/kaggle/input/dog-breed-identification/labels.csv')
sub_df = pd.read_csv('/kaggle/input/dog-breed-identification/sample_submission.csv')
test_df = pd.DataFrame({
    'id':sub_df['id'],
    'path':sub_df['id'].astype(str).apply(lambda x:'/kaggle/input/dog-breed-identification/test/' + x + '.jpg')
})

In [None]:
train_df['path'] = train_df['id'].astype(str).apply(lambda x:'/kaggle/input/dog-breed-identification/train/' + x + '.jpg')

# **Explore Data**

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
train_df.shape,test_df.shape

In [None]:
uni_label = sorted(train_df['breed'].unique())
num = len(uni_label)
col = 4
row = int(num/col)
plt.figure(figsize=(20,5*row))
for i,label in enumerate(uni_label):
    img_row = train_df[train_df['breed'] == label].iloc[0]
    img_path = img_row['path']
    img = mpimg.imread(img_path)
    plt.subplot(row,col,i+1)
    plt.imshow(img)
    plt.title(label)
plt.tight_layout()
plt.show

# **Augmentation**

In [None]:
os.makedirs("train_aug", exist_ok=True)
aug = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.2)
])

In [None]:
new_rows = []
for idx,row in tqdm(train_df.iterrows(),total=len(train_df)):
    img_path = row['path']
    image = cv2.imread(img_path)
    for i in range(4):
        augment = aug(image=image)['image']
        new_filename = f"train_aug/{row['id']}_aug_{i}.jpg"
        cv2.imwrite(new_filename,augment)
        new_row = row.copy()
        new_row['path'] = new_filename
        new_row['id'] = f"{row['id']}_aug_{i}"
        new_rows.append(new_row)

In [None]:
aug_df = pd.DataFrame(new_rows)
train_aug_df = pd.concat([train_df,aug_df],axis = 0).reset_index(drop=True)
print(f"Original shape: {train_df.shape}")
print(f"Augmented shape: {train_aug_df.shape}")

# **Model**

In [None]:
label = 'breed'
metric = 'log_loss'

predictor = MultiModalPredictor(label = label,eval_metric = metric)
predictor.fit(
    time_limit = 3600,
    presets = 'best_quality',
    train_data = train_aug_df
)

# **Evaluate**

In [None]:
predictor.evaluate(train_df)

# **Submission**

In [None]:
pred = predictor.predict_proba(test_df)

In [None]:
sub_df = pd.DataFrame()
sub_df['id'] = test_df['id']
sub_df = pd.concat([sub_df, pred], axis=1)
sub_df = sub_df[sub_df.columns]

In [None]:
sub.head()

In [None]:
sub.to_csv('submission.csv', index=False)