# Scrapper Tests

### 
___

### Pinterest

#### Importing Libraries

In [None]:
import os
import sys

import uuid
import requests
from PIL import Image
from io import BytesIO
from tqdm import tqdm
import pandas as pd


relative_path_to_script = '../utils'
absolute_path_to_script = os.path.abspath(relative_path_to_script)
sys.path.append(absolute_path_to_script)

from pintrest_scraper import scraper


#### Initializing Inputs and Outputs

In [None]:
list_keywords = ["Asmita Kaushik","Masoom Minawala","Roshni Bhatia","Trendy Fashion","India Street Fashion"]
no_of_image = 1

directory = "/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/pinterest"
os.makedirs(directory, exist_ok=True)

#### Scraping Data

In [None]:
pintrest_df = pd.DataFrame(columns=["Image ID","Image Source", "Image Link"])

In [None]:
for keyword in list_keywords:
    details = scraper.scrape(keyword,max_images = no_of_image)

    print(keyword)

    if not details["isDownloaded"]:
        print("\nNothing to download !!",end = "\n---------------------\n")
        continue

    
    print("\nDownloading completed !!")
    image_ids = {}

    for url in tqdm(details['url_list']):

        namespace = uuid.NAMESPACE_DNS
        image_id = uuid.uuid3(namespace, url)

        if(image_id in image_ids):continue

        image_ids[image_id] = 1 
        response = requests.get(url)
        image = Image.open(BytesIO(response.content))
        image.save(f"{directory}/{image_id}.png")

        row = {"Image ID":image_id,"Image Source":"Pinterest", "Image Link":url}
        pintrest_df = pd.concat([pintrest_df,pd.DataFrame([row])],ignore_index=True)

    print("\n-------------------------------\n")


pintrest_df.to_csv(f"{directory}/pintrest_trend.csv",index=False)


#### Final Function

In [None]:
import os
import sys

import uuid
import requests
from PIL import Image
from io import BytesIO
from tqdm import tqdm
import pandas as pd


relative_path_to_script = '../utils'
absolute_path_to_script = os.path.abspath(relative_path_to_script)
sys.path.append(absolute_path_to_script)


from pintrest_scraper import scraper

In [None]:
def scrape_pinterest_data(list_keywords:list[str],no_of_images_per_keyword:int,output_folder_path:str,save_data_frame:bool = False) -> pd.DataFrame:

    pintrest_df = pd.DataFrame(columns=["Image ID","Image Source", "Image Link"])

    for keyword in list_keywords:
        details = scraper.scrape(keyword,max_images = no_of_images_per_keyword)

        print(keyword)

        if not details["isDownloaded"]:
            print("\nNothing to download !!",end = "\n-------------------------------\n")
            continue

        
        print("\nDownloading completed !!")
        image_ids = {}

        for url in tqdm(details['url_list']):

            namespace = uuid.NAMESPACE_DNS
            image_id = uuid.uuid3(namespace, url)

            if(image_id in image_ids):continue

            image_ids[image_id] = 1 
            response = requests.get(url)
            image = Image.open(BytesIO(response.content))
            image.save(f"{directory}/{image_id}.png")

            row = {"Image ID":image_id,"Image Source":"Pinterest", "Image Link":url}
            pintrest_df = pd.concat([pintrest_df,pd.DataFrame([row])],ignore_index=True)

        print("\n-------------------------------\n")

    if(save_data_frame):pintrest_df.to_csv(f"{output_folder_path}/pintrest_trend.csv",index=False)
    
    return pintrest_df


#### Usecase Example

In [None]:
list_keywords = ["Asmita Kaushik","Masoom Minawala","Roshni Bhatia","Trendy Fashion","India Street Fashion"]

directory = "/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/pinterest"
os.makedirs(directory, exist_ok=True)

pintrest_df = scrape_pinterest_data(list_keywords,no_of_images_per_keyword=1,output_folder_path=directory,save_data_frame=True)

###
---

### Myntra 

#### Importing Libraries

In [None]:
import os 
import requests
from PIL import Image
from io import BytesIO
from tqdm import tqdm
import uuid
import pandas as pd

#### Initializing Inputs and Outputs

In [None]:
df = pd.read_csv('/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/myntra_raw_data.csv')

directory = "/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/myntra"
os.makedirs(directory, exist_ok=True)

#### Scraping Data

In [None]:
myntra_df = pd.DataFrame(columns=["Image ID","Image Source", "Image Link"])

In [None]:
image_ids = {}

for url in tqdm(df['img-responsive src']):

    namespace = uuid.NAMESPACE_DNS
    image_id = uuid.uuid3(namespace, url)

    if(image_id in image_ids):continue

    image_ids[image_id] = 1 
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))
    image.save(f"{directory}/{image_id}.png")

    row = {"Image ID":image_id,"Image Source":"Pinterest", "Image Link":url}
    myntra_df = pd.concat([myntra_df,pd.DataFrame([row])],ignore_index=True)


myntra_df.to_csv(f"{directory}/myntra_trend.csv",index=False)

#### Final Function

In [None]:
import os 
import requests
from PIL import Image
from io import BytesIO
from tqdm import tqdm
import uuid
import pandas as pd

In [None]:
def process_myntra_data(df:pd.DataFrame,output_folder_path:str,save_data_frame:bool = False) -> pd.DataFrame:
    image_ids = {}
    myntra_df = pd.DataFrame(columns=["Image ID","Image Source", "Image Link"])

    for url in tqdm(df['img-responsive src']):

        namespace = uuid.NAMESPACE_DNS
        image_id = uuid.uuid3(namespace, url)

        if(image_id in image_ids):continue

        image_ids[image_id] = 1 
        response = requests.get(url)
        image = Image.open(BytesIO(response.content))
        image.save(f"{directory}/{image_id}.png")

        row = {"Image ID":image_id,"Image Source":"Pinterest", "Image Link":url}
        myntra_df = pd.concat([myntra_df,pd.DataFrame([row])],ignore_index=True)


    if(save_data_frame):myntra_df.to_csv(f"{output_folder_path}/myntra_trend.csv",index=False)
    return myntra_df

#### Usecase Example

In [None]:
df = pd.read_csv('/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/myntra_raw_data.csv')

directory = "/Users/t.sumukhflexday/Desktop/Projects/Test/fashion_trend/fashion_trend/data/myntra"
os.makedirs(directory, exist_ok=True)


temp = process_myntra_data(df = df,output_folder_path=directory,save_data_frame=True)

### 
___