In [18]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import pandas as pd

In [20]:
import zipfile

In [21]:
import os

In [23]:
def extract_zip_file(zip_path, destination):
    """
    Extracts the contents of the ZIP file to the specified destination.

    Parameters:
    - zip_path: Path to the ZIP file.
    - destination: Directory where the ZIP file should be extracted.
    """
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(destination)


def collect_image_data(root_path):


    data = []
    for dirpath, dirnames, filenames in os.walk(root_path):
        for file in filenames:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                label = os.path.basename(dirpath)
                image_path = os.path.join(dirpath, file)
                data.append([image_path, label])
    return data


def create_dataframe_from_data(data):

    return pd.DataFrame(data, columns=['Image_path', 'Label'])


# Example usage:
zip_path = '/content/drive/MyDrive/Capstone/Garbage.zip'
destination = 'Project'

extract_zip_file(zip_path, destination)
data = collect_image_data(destination)
df = create_dataframe_from_data(data)

df.head()

Unnamed: 0,Image_path,Label
0,Project/__MACOSX/Garbage/plastic/._plastic593.jpg,plastic
1,Project/__MACOSX/Garbage/plastic/._plastic731.jpg,plastic
2,Project/__MACOSX/Garbage/plastic/._plastic15.jpg,plastic
3,Project/__MACOSX/Garbage/plastic/._plastic396.jpg,plastic
4,Project/__MACOSX/Garbage/plastic/._plastic687.jpg,plastic


In [24]:
import plotly.express as px

# Prepare the data for Plotly Express
df_counts = df['Label'].value_counts().reset_index()
df_counts.columns = ['Garbage Type', 'Counts']

# Create a bar chart using Plotly Express
fig = px.bar(df_counts,
             x='Garbage Type',
             y='Counts',
             title='Value Counts for Each Type of Garbage',
             labels={'Garbage Type': 'Type of Garbage', 'Counts': 'Counts'},
             color='Garbage Type',
             category_orders={"Garbage Type": df_counts['Garbage Type'].tolist()},
             text='Counts',  # Add count labels on top of the bars
             color_discrete_sequence=px.colors.qualitative.Dark2)  # Set color scheme

# Customize the layout
fig.update_traces(marker=dict(line=dict(width=0.5, color='DarkSlateGrey')))
fig.update_xaxes(title_text=None)  # Remove x-axis label
fig.update_yaxes(title_text=None)  # Remove y-axis label

# Show the plot
fig.show()
