In [ ]:
import os, sys

sys.path.append(os.path.abspath(os.path.join('..')))

import matplotlib.pyplot as plt

import seaborn as sns
import numpy as np
import pandas as pd
import imagehash

from utils.visualization import data_count_plot
from utils.visualization import plot_histogram
from utils.duplicates import prepare_duplicates, visualize_duplicates

from utils import data


### Loading Dataframe

In [ ]:
base_path = '../data/raw/Furniture_Data'
df = data.load(base_path)

In [ ]:
df.describe()

In [ ]:
df.head()

### Histogram Plot

In [ ]:
# Plot histograms for numerical columns
plot_histogram(df, 'Width', title='Histogram of Image Widths')
plot_histogram(df, 'Height', title='Histogram of Image Heights')
plot_histogram(df, 'Ratio', title='Histogram of Image Ratios')

In [ ]:
fig, ax = plt.subplots(2, 3, figsize=(15, 10))

data_count_plot(df, 'Type', ax=ax[0][0], title='File Types', annotate=True, palette='Set2')
data_count_plot(df, 'Mode', ax=ax[0][2], title='Image Modes', annotate=True, palette='Accent')
data_count_plot(df, 'Class', ax=ax[1][1], horizontal=True, title='Classes', annotate=True, palette='Set1')
data_count_plot(df, 'Style', ax=ax[1][2], horizontal=True, title='Styles', annotate=True, palette='Set1')

fig.suptitle('Raw Dataset Statistics', fontsize=20, fontweight='bold', y=1.0)
fig.tight_layout()

---
## Duplicate handling

In [ ]:
duplicates = prepare_duplicates(df)

In [ ]:
duplicates

In [ ]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Duplicate"], title="Duplicate images (same Class and same Style)")

In [ ]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Similar"], title="Similar images (same Class but different Style)")

In [ ]:
visualize_duplicates(duplicates[duplicates["Duplicate_Type"] == "Inspect"], title="Manual inspection needed (different Class)")

In [ ]:
multiple_duplicates = duplicates[duplicates.groupby('Group')['Group'].transform('count') > 3]
visualize_duplicates(multiple_duplicates)

In [ ]:

duplicates = find_near_duplicates(df, 1)

In [ ]:
display_image('beds/Asian/19726asian-daybeds.jpg', base_path='../')

In [ ]:
augmented = augment_image('../data/raw/Furniture_Data/beds/Asian/19726asian-daybeds.jpg')
augmented.show()