# Role Classifier AI
This notebook contains AI model that is able to classify images into their roles (informative, decorative, functional, text, or complex). The model takes image and several text attributes as input and outputs a role.

## Load Data

In [1]:
# Install dependencies
%pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [11]:
import os
import json
import pandas as pd

# Define the path to the directory containing the JSON files
json_dir = "../scraper/output"

# Initialize empty lists to store the image links and textual contexts
srcs = []
file_names = []
roles = []
alts = []
attrs = []
a_button_parents = []
previous_texts = []
next_texts = []
textual_contexts = []

images = []

# Loop through each JSON file in the directory
for filename in os.listdir(json_dir):
    if filename.endswith(".json"):
        # Read the JSON file
        with open(os.path.join(json_dir, filename), "r") as file:
            data = json.load(file)
        
        # Extract the image link and textual context from the JSON data
        whole_text = data["text"]
        sub_images = data["images"]

        images.extend(sub_images)

        for image in images:
            # {
            #     "src": "http://donutworrybehappy.eu/getattachment/71afc349-9d82-4861-9324-1d304061f188/hero-section.aspx",
            #     "file_name": "./images/donutworrybehappy.eu/image_12.jpg",
            #     "role": "text",
            #     "alt": "Strawjelly Jam: fresh strawberry-style glaze with Belgian chocolate cover",
            #     "attrs": {
            #         "src": "/getattachment/71afc349-9d82-4861-9324-1d304061f188/hero-section.aspx?",
            #         "class": [
            #             "StyledMobileImage-sc-y8dlz5",
            #             "cZQlpK"
            #         ]
            #     },
            #     "a_button_parent": "None",
            #     "previous_text": "",
            #     "next_text": "Donut Worry Be Happy"
            # }
            src = image["src"]
            file_name = image["file_name"]
            role = image["role"]
            alt = image["alt"]
            attr = image["attrs"]
            a_button_parent = image["a_button_parent"]
            previous_text = image["previous_text"]
            next_text = image["next_text"]
            textual_context = whole_text
            
            # Append the image link and textual context to the respective lists
            srcs.append(src)
            file_names.append(file_name)
            roles.append(role)
            alts.append(alt)
            attrs.append(attrs)
            a_button_parents.append(a_button_parent)
            previous_texts.append(previous_text)
            next_texts.append(next_text)
            textual_contexts.append(textual_context)

# Create a dataframe from the lists
df = pd.DataFrame(images)

# Display the number data points in the dataframe
print(f"Number of data points: {df.shape[0]}")

# Display the dataframe
df.head()

Number of data points: 57


Unnamed: 0,src,file_name,role,alt,attrs,a_button_parent,previous_text,next_text
0,https://kajabi-storefronts-production.kajabi-c...,./images/bnbbosses.com/image_1.jpg,text,"#ProtectTheChildren. ""As a doctor, lawyer, and...","{'class': ['image__image'], 'src': 'https://ka...","<a class=""image__link"" href=""https://americasf...","If you want the safe treatments for CoVID, the...",Looking for the Latest Tech & Tools for your B...
1,https://kajabi-storefronts-production.kajabi-c...,./images/bnbbosses.com/image_2.jpg,informative,"Money rolled tightly with an egg with ""IRA"" wr...","{'class': ['card__image'], 'src': 'https://kaj...","<a class=""card"" href=""/blog/what-is-a-self-dir...","Articles, Blogs, & Podcasts",What Is a Self-Directed IRA & Why You Need One
2,https://kajabi-storefronts-production.kajabi-c...,./images/bnbbosses.com/image_3.jpg,informative,"A husband and wife in a living room, the wife ...","{'class': ['card__image'], 'src': 'https://kaj...","<a class=""card"" href=""/blog/how-to-write-an-ef...","May 30, 2023",How to Write an Effective Listing for Your Pro...
3,https://kajabi-storefronts-production.kajabi-c...,./images/bnbbosses.com/image_4.jpg,informative,A black woman with curly hair holding money ov...,"{'class': ['image__image'], 'src': 'https://ka...","<a class=""image__link"" href=""https://www.bnbbo...",Affiliates share our wonderful products and se...,Go and Grow!
4,https://kajabi-storefronts-production.kajabi-c...,./images/bnbbosses.com/image_5.jpg,text,"#ProtectTheChildren. ""(Thousands} have died af...","{'class': ['image__image'], 'src': 'https://ka...","<a class=""image__link"" href=""https://americasf...","If you want the safe treatments for CoVID, the...",Looking for the Latest Tech & Tools for your B...
