# Imports

In [79]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from datetime import datetime as dt
# import datetime as dt
from scheduler import Scheduler
import scheduler.trigger as trigger

# Doen Dresses

## Extract

In [80]:
# Sold Doen Dresses
url = 'https://poshmark.com/search?query=Doen&brand%5B%5D=Doen&availability=sold_out&department=Women&category=Dresses'
brand = 'doen'
category = 'dress'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set dress styles

In [81]:
# Dresses
dresses = [
    "Ischia", "Marianne", "Rosaria", "Lisbeth", "Wilmina",
    "Aurora", "Delphine", "Clara", "Sabine", "Odette",
    "Evelyn", "Juliette", "Amelie", "Camille", "Vera",
    "Fiona", "Nina", "Corinne", "Helene", "Lucille",
    "Margot", "Anya", "Freya", "Sylvie", "Celine",
    "Daphne", "Iris", "Luna", "Flora", "Amara",
    "Maeve", "Adelaide", "Ophelia", "Scarlett", "Viola",
    "Anastasia", "Claudia", "Elise", "Matilda", "Eloise",
    "Gwendolyn", "Ivy", "Cecilia", "Esme", "Tessa",
    "Madeleine", "Poppy", "Adele", "Serena", "Genevieve",
    "Lorraine", "Merla", "Narcisse", "Augustina", "Sabrina",
    "Alessandra", "Leyah", "Guinevere", "Francetta", "Kaira",
    "Raquella", "Pamelina", "Quinn", "Celestine", "Emmaretta",
    "Piper", "Violetta", "Noisette", "Franique", "Sera",
    "Lovisa", "Rhodia", "Gladys"
]


dresses = [x.lower() for x in dresses]
dresses = [x +' ' for x in dresses] # add a space after the name to exlude partial matches


### Create Dataframe

In [82]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for dress in dresses:
        if dress in title:
            style = dress
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"dress",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    


  df = pd.concat([df,df1], axis =0,ignore_index= True)


In [83]:
df.head()

Unnamed: 0,title,price,size,category,style,brand
0,doen celestine dress,250.0,M,dress,celestine,doen
1,doen victorie maxi dress in mirbeau garden flo...,200.0,M,dress,,doen
2,doen brooke dress harvest meadow floral maxi d...,140.0,XL,dress,,doen
3,doen poet ruffled midi dress - black - xs,160.0,XS,dress,,doen
4,doen carina eyelet mini dress black - size xl,150.0,XL,dress,,doen


## Load

In [84]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()

Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,doen celestine dress,250.0,M,dress,celestine,doen,2024-Dec-29-22:43:03
1,doen victorie maxi dress in mirbeau garden flo...,200.0,M,dress,,doen,2024-Dec-29-22:43:03
2,doen brooke dress harvest meadow floral maxi d...,140.0,XL,dress,,doen,2024-Dec-29-22:43:03
3,doen poet ruffled midi dress - black - xs,160.0,XS,dress,,doen,2024-Dec-29-22:43:03
4,doen carina eyelet mini dress black - size xl,150.0,XL,dress,,doen,2024-Dec-29-22:43:03


# Doen Tops

## Extract

In [85]:
# Sold Doen Tops
url = 'https://poshmark.com/brand/Doen-Women-Tops?availability=sold_out'
brand = 'doen'
category = 'top'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [86]:
# Tops
tops = [
    "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
    "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
    "Viola", "Monique", "Sophie", "Claudine", "Elena",
    "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
    "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
    "Colette", "Marielle", "Florelle", "Adeline", "Renee",
    "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
    "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
    "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
    "Josephine", "Evelina", "Lucia", "Freya", "Marion",
    "Scarlet", "Jane", "June", "Lille", "Fidella", "Frederica",
    "Anaya", "Idella", "Baptisia", "Eldoris", "Camri",
    "Jacey", "Leia", "Amaline", "Solange", "Evette",
    "Lois", "Alida", "Ashby", "Nerida", "Agotha",
    "Frances", "Paola", "Vivienne"
]

tops = [x.lower() for x in tops]
tops = [x +' ' for x in tops] # add a space after the name to exlude partial matches

### Create Dataframe

In [87]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for top in tops:
        if top in title:
            style = top
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"top",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


## Load

In [88]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')
   
df.head() 


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,doen black albertine blouse top size xl,130.0,XL,top,,doen,2024-Dec-29-22:43:05
1,doen vera top in salt xs,125.0,XS,top,vera,doen,2024-Dec-29-22:43:05
2,dòen flowered blouse of good size,90.0,L,top,,doen,2024-Dec-29-22:43:05
3,doen narcissa top in pink valley floral pink,110.0,XS,top,,doen,2024-Dec-29-22:43:05
4,doen viola black and white floral ruffled top ...,90.0,L,top,viola,doen,2024-Dec-29-22:43:05


# Doen Skirts

## Extract

In [89]:
# Sold Doen Skirts
url = 'https://poshmark.com/brand/Doen-Women-Skirts?availability=sold_out'
brand = 'doen'
category = 'skirt'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [90]:
# Skirts
skirts = [
    "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
    "Odile", "Esther", "Giselle", "Clementine", "Simone",
    "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
    "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
    "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
    "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
    "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
    "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
    "Odessa", "Viola", "Lenore", "Selene", "Camille",
    "Amelie", "Liora", "Fiona", "Adria", "Melisande","Elowena",
    "Nandi","Cheri","Fabienne","Rieti","Sanremo"
]

skirts = [x.lower() for x in skirts]
# skirts = [x +' ' for x in skirts] # add a space after the name to exlude partial matches


### Create Dataframe

In [91]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for skirt in skirts:
        if skirt in title:
            style = skirt
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


## Load

In [92]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')
   
df.head() 

Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,"doen elowena skirt in villette paisley, brand ...",170.0,S,skirt,elowen,doen,2024-Dec-29-22:43:07
1,"doen sebastiane lace skirt in black, brand new...",180.0,S,skirt,sebastiane,doen,2024-Dec-29-22:43:07
2,doen nandi skirt in daffodil yellow,168.0,Various,skirt,nandi,doen,2024-Dec-29-22:43:07
3,"doen sebastiane lace skirt in black, brand new...",180.0,XS,skirt,sebastiane,doen,2024-Dec-29-22:43:07
4,nwt doen elowen skirt in deep navy sz m,220.0,M,skirt,elowen,doen,2024-Dec-29-22:43:07


# Hill House

## Dresses

In [93]:
# Sold Hil House Dresses
url = 'https://poshmark.com/brand/Hill_House-Women-Dresses?availability=sold_out'
brand = 'hill_house'
category = 'dress'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,new! hill house cosima nap dress chocolate ton...,345.0,L,skirt,,hill_house,2024-Dec-29-22:43:08
1,new hill house ellie nap dress in navy gingham l,94.0,L,skirt,,hill_house,2024-Dec-29-22:43:08
2,hill house home ellie nap dress midi powder bl...,35.0,L,skirt,,hill_house,2024-Dec-29-22:43:08
3,hill house ellie nap dress,65.0,XS,skirt,,hill_house,2024-Dec-29-22:43:08
4,hill house - the ellie nap dress (ski chalet),85.0,XXS,skirt,,hill_house,2024-Dec-29-22:43:08


## Skirts

In [94]:

url = 'https://poshmark.com/brand/Hill_House-Women-Skirts?availability=sold_out'
brand = 'hill_house'
category = 'skirt'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,hill house skirt nwt,80.0,S,skirt,,hill_house,2024-Dec-29-22:43:09
1,hill house size l delphine skirt nwt,25.0,L,skirt,,hill_house,2024-Dec-29-22:43:09
2,hill house home nwt sz l delphine nap skirt na...,50.0,L,skirt,,hill_house,2024-Dec-29-22:43:09
3,hill house mirabel skirt,55.0,M,skirt,,hill_house,2024-Dec-29-22:43:09
4,nwt hill house home paz linen mini skirt in navy,12.0,M,skirt,,hill_house,2024-Dec-29-22:43:09


## Tops

In [95]:
url = 'https://poshmark.com/brand/Hill_House-Women-Tops?availability=sold_out'
brand = 'hill_house'
category = 'top'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,hill house home jewel jammie top in white,37.0,S,skirt,,hill_house,2024-Dec-29-22:43:11
1,"hill house kit top in white, size s like new",22.0,S,skirt,,hill_house,2024-Dec-29-22:43:11
2,nwt hill house home gingham collared top,18.0,S,skirt,,hill_house,2024-Dec-29-22:43:11
3,hill house home aiko nap top in emerald botani...,16.0,XS,skirt,,hill_house,2024-Dec-29-22:43:11
4,hill house home kit top - size: medium,40.0,M,skirt,,hill_house,2024-Dec-29-22:43:11


# Agua by Agua Bendita

## Dresses

In [96]:
url = 'https://poshmark.com/brand/Agua_by_Agua_Bendita-Women-Dresses?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,agua bendita mambo primavera linen midi dress,245.0,S,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:12
1,agua by agua bendita manzanilla anturios mini ...,145.0,XS,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:12
2,agua agua bendita sucre cut-out poplin floral ...,95.0,S,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:12
3,agua by agua bendita caramelo primavera cotton...,15.0,S,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:12
4,nwot agua by agua bendita floral midi dress,80.0,M,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:12


## Skirts

In [97]:
url = 'https://poshmark.com/brand/Agua_by_Agua_Bendita-Women-Skirts?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,agua by agua bendita lavanda cotton silk blend...,220.0,one size fits S-M-L,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:14
1,agua by agua bendita arroyo caracola midi skirt,2100.0,S,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:14
2,agua by agua bendita 'bergamota' green printed...,250.0,S,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:14
3,agua bendita astro esmeralda lavanda sarong pa...,110.0,One Size,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:14
4,agua by agua bendita\nalcachofa printed cotton...,147.0,M,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:14


## Tops

In [98]:
url = 'https://poshmark.com/brand/Reformation-Women-Tops?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation women will oversize shirt long sle...,48.0,M,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:16
1,reformation top,34.0,XS,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:16
2,🌹reformation top,35.0,2,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:16
3,reformation women hearth long sleeve top peat ...,50.0,12,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:16
4,reformation novena white ruffle tie strap tank...,30.0,2,skirt,,aguaByAguaBendita,2024-Dec-29-22:43:16


# Reformation

## Dresses

In [99]:
url = 'https://poshmark.com/brand/Reformation-Women-Dresses?availability=sold_out'
brand = 'reformation'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation pia velvet dress crystal embellish...,150.0,2,skirt,,reformation,2024-Dec-29-22:43:17
1,✨ reformation jagger square neck minidress in ...,38.0,M,skirt,,reformation,2024-Dec-29-22:43:17
2,reformation black knit maxi dress long sleeves...,14.0,XL,skirt,,reformation,2024-Dec-29-22:43:17
3,reformation cello long sleeve midi dress 1x gr...,115.0,1X,skirt,,reformation,2024-Dec-29-22:43:17
4,reformation jeans linah mini fit and flare dre...,115.0,M,skirt,,reformation,2024-Dec-29-22:43:17


## Skirts

In [100]:
url = 'https://poshmark.com/brand/Reformation-Women-Skirts?availability=sold_out'
brand = 'reformation'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation bronze gold 100% silk twist front ...,27.0,2,skirt,,reformation,2024-Dec-29-22:43:19
1,reformation mini skirt. size 4. brand new. msr...,65.0,4,skirt,,reformation,2024-Dec-29-22:43:19
2,reformation bea skirt,80.0,8,skirt,,reformation,2024-Dec-29-22:43:19
3,reformation robbie skirt (army),38.0,8,skirt,,reformation,2024-Dec-29-22:43:19
4,reformation bea skirt,33.0,0,skirt,,reformation,2024-Dec-29-22:43:19


## Tops

In [101]:
url = 'https://poshmark.com/brand/Reformation-Women-Tops?availability=sold_out'
brand = 'reformation'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation women will oversize shirt long sle...,48.0,M,skirt,,reformation,2024-Dec-29-22:43:21
1,reformation top,34.0,XS,skirt,,reformation,2024-Dec-29-22:43:21
2,🌹reformation top,35.0,2,skirt,,reformation,2024-Dec-29-22:43:21
3,reformation women hearth long sleeve top peat ...,50.0,12,skirt,,reformation,2024-Dec-29-22:43:21
4,reformation novena white ruffle tie strap tank...,30.0,2,skirt,,reformation,2024-Dec-29-22:43:21


# Zimmerman

## Dresses

In [102]:
url = 'https://poshmark.com/brand/zimmerman-Women-Dresses?availability=sold_out'
brand = 'zimmerman'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,zimmerman georgette dress,195.0,6,skirt,,zimmerman,2024-Dec-29-22:43:22
1,zimmerman,350.0,6,skirt,,zimmerman,2024-Dec-29-22:43:22
2,zimmerman off shoulder white 3/4 sleeves multi...,89.0,1 (4-6),skirt,,zimmerman,2024-Dec-29-22:43:22
3,zimmerman ready to wear wonderland floral bias...,700.0,6,skirt,,zimmerman,2024-Dec-29-22:43:22
4,zimmermann melody off shoulder mini dress yell...,138.0,4,skirt,,zimmerman,2024-Dec-29-22:43:22


## Skirts

In [103]:
url = 'https://poshmark.com/brand/zimmerman-Women-Skirts?availability=sold_out'
brand = 'zimmerman'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,black and white skirt,25.0,M,skirt,,zimmerman,2024-Dec-29-22:43:23


## Tops

In [104]:
url = 'https://poshmark.com/brand/zimmerman-Women-Tops?availability=sold_out'
brand = 'zimmerman'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,#49 zimmermann lace off shoulder top,66.0,6,skirt,,zimmerman,2024-Dec-29-22:43:24
1,zimmerman - botanica wattle blouse,440.0,6,skirt,,zimmerman,2024-Dec-29-22:43:24
2,zimmerman botanica high- neck cropped bow-tie ...,950.0,Size 4 or US 12,skirt,,zimmerman,2024-Dec-29-22:43:24
3,nwt zimmerman kali hibiscus linen blouse size 1,70.0,S,skirt,,zimmerman,2024-Dec-29-22:43:24
4,zimmerman silk organza paisley print top sz3,50.0,Size 3 ZImmerman,skirt,,zimmerman,2024-Dec-29-22:43:24


# Utilities

## Product Style Names

In [105]:
# # Tops
# tops = [
#     "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
#     "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
#     "Viola", "Monique", "Sophie", "Claudine", "Elena",
#     "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
#     "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
#     "Colette", "Marielle", "Florelle", "Adeline", "Renee",
#     "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
#     "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
#     "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
#     "Josephine", "Evelina", "Lucia", "Freya", "Marion"
# ]

# # Skirts
# skirts = [
#     "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
#     "Odile", "Esther", "Giselle", "Clementine", "Simone",
#     "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
#     "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
#     "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
#     "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
#     "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
#     "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
#     "Odessa", "Viola", "Lenore", "Selene", "Camille",
#     "Amelie", "Liora", "Fiona", "Adria", "Melisande"
# ]

# # Pants
# pants = [
#     "Diana", "Clara", "Eloise", "Harper", "Amalia",
#     "Marguerite", "Florine", "Estelle", "Genevieve", "Sabine",
#     "Odessa", "Lorelei", "Miriam", "Joanna", "Lenore",
#     "Viviana", "Cecile", "Delia", "Lucinda", "Aurora",
#     "Francesca", "Eliana", "Isobel", "Norah", "Camille",
#     "Selena", "Veronica", "Marina", "Adela", "Corinne",
#     "Penelope", "Rosalyn", "Evelyn", "Jocasta", "Beatrice",
#     "Cressida", "Natalia", "Florence", "Sophia", "Lyra",
#     "Octavia", "Philippa", "Sabina", "Clarissa", "Giselle",
#     "Ines", "Amara", "Elara", "Lucille", "Mattea"
# ]

# # Jeans
# jeans = [
#     "Diana", "Harper", "Juliette", "Sienna", "Isabel",
#     "Audrey", "Celeste", "Phoebe", "Emilia", "Florence",
#     "Margo", "Eleanor", "Claudia", "Colette", "Bianca",
#     "Anastasia", "Lena", "Amelie", "Louisa", "Monica",
#     "Rosa", "Thea", "Verity", "Naomi", "Tessa",
#     "Beatrix", "Delilah", "Eliza", "Helena", "Sylvia",
#     "Odette", "Simone", "Freya", "Camilla", "Vera",
#     "Gwen", "Margaux", "Adeline", "Clara", "Anneliese",
#     "Bridget", "Catherine", "Daphne", "Elinor", "Felicity",
#     "Georgina", "Hannah", "Isolde", "Jade", "Maribel"
# ]


## Doen Product Names

In [106]:
# # Doen Dresses
# url = 'https://www.shopdoen.com/collections/dresses?utm_source=google&campaign_id=10806218531&ad_id=724988960813&utm_medium=cpc&utm_campaign=actual_campaign_name_hardcoded&utm_content=104400744937&utm_term=&gclid=Cj0KCQiA4L67BhDUARIsADWrl7E0ZY9XqbDkatpsUlP50aYS24KA_m3tyCe0SNkiYI4icGTUFzKFJ7YaAoDHEALw_wcB&gad_source=1'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 

In [107]:
# # Doen Tops
# url = 'https://www.shopdoen.com/collections/tops'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 