In [None]:
import os, sys

sys.path.append(os.path.abspath(os.path.join('..')))

import matplotlib.pyplot as plt

from utils.visualization import data_count_plot
from utils.visualization import plot_histogram
from utils.duplicates import prepare_duplicates, visualize_duplicates
from utils.augmentation import augment_image

from utils import data

### Loading Dataframe

In [None]:
base_path = '../data/raw/Furniture_Data'
df = data.load(base_path)

In [None]:
df.describe()

In [None]:
df.head()

### Histogram Plot

In [None]:
# Plot histograms for numerical columns
plot_histogram(df, 'Width', title='Histogram of Image Widths')
plot_histogram(df, 'Height', title='Histogram of Image Heights')
plot_histogram(df, 'Ratio', title='Histogram of Image Ratios')

### Countplot

In [None]:
     # Display all in 1 plot

fig, ax = plt.subplots(2, 2, figsize=(10, 10))
fig.subplots_adjust(hspace=0.3, wspace=0.7)

data_count_plot(df, 'Type', ax=ax[0][0], title='File Types', palette='Set1')
data_count_plot(df, 'Mode', ax=ax[0][1], title='Image Modes', palette='pastel')
data_count_plot(df, 'Class', ax=ax[1][0], horizontal=True, title='Classes', palette='crest')
data_count_plot(df, 'Style', ax=ax[1][1], horizontal=True, title='Styles', palette='flare')

fig.suptitle('Raw Dataset Statistics', fontsize=20, fontweight='bold', y=1.0)
fig.tight_layout()

    # Display individually

# cols = ['Type', 'Mode', 'Class']

# for col in cols:data_count_plot(df, col, title=col)
# data_count_plot(df, 'Style', rotation=45)

In [None]:
classes = df['Class'].unique()

    # Display all in 1 plot

fig, ax = plt.subplots(3, 2, figsize=(10, 15))
fig.subplots_adjust(hspace=0.3, wspace=0.7)
ax = ax.flatten()

for i, class_name in enumerate(classes):
    data_count_plot(df[df['Class'] == class_name], col='Style', ax=ax[i], horizontal=True, title=class_name + '_styles_count', palette='crest')

    # Display individually

# for class_name in classes:
#     data_count_plot(df[df['Class'] == class_name], col='Style', horizontal=True, title=class_name + '_styles_count')"

---
## Duplicate identifying

In [None]:
duplicates = prepare_duplicates(df)

In [None]:
duplicates

In [None]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Duplicate"], title="Duplicate images (same Class and same Style)")

In [None]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Similar"], title="Similar images (same Class but different Style)")

In [None]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Inspect"], title="Manual inspection needed (different Class)")

In [None]:
multiple_duplicates = duplicates[duplicates.groupby('Group')['Group'].transform('count') > 3]
visualize_duplicates(multiple_duplicates)

---
## Augment

In [None]:
augmented = augment_image('../data/raw/Furniture_Data/beds/Asian/19726asian-daybeds.jpg')
augmented.show()

---
## Store data

In [None]:
# Save to file