# Imports

In [30]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from datetime import datetime as dt
# import datetime as dt
from scheduler import Scheduler
import scheduler.trigger as trigger

# Doen Dresses

## Extract

In [31]:
# Sold Doen Dresses
url = 'https://poshmark.com/search?query=Doen&brand%5B%5D=Doen&availability=sold_out&department=Women&category=Dresses'
brand = 'doen'
category = 'dress'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set dress styles

In [32]:
# Dresses
dresses = [
    "Ischia", "Marianne", "Rosaria", "Lisbeth", "Wilmina",
    "Aurora", "Delphine", "Clara", "Sabine", "Odette",
    "Evelyn", "Juliette", "Amelie", "Camille", "Vera",
    "Fiona", "Nina", "Corinne", "Helene", "Lucille",
    "Margot", "Anya", "Freya", "Sylvie", "Celine",
    "Daphne", "Iris", "Luna", "Flora", "Amara",
    "Maeve", "Adelaide", "Ophelia", "Scarlett", "Viola",
    "Anastasia", "Claudia", "Elise", "Matilda", "Eloise",
    "Gwendolyn", "Ivy", "Cecilia", "Esme", "Tessa",
    "Madeleine", "Poppy", "Adele", "Serena", "Genevieve",
    "Lorraine", "Merla", "Narcisse", "Augustina", "Sabrina",
    "Alessandra", "Leyah", "Guinevere", "Francetta", "Kaira",
    "Raquella", "Pamelina", "Quinn", "Celestine", "Emmaretta",
    "Piper", "Violetta", "Noisette", "Franique", "Sera",
    "Lovisa", "Rhodia", "Gladys"
]


dresses = [x.lower() for x in dresses]
dresses = [x +' ' for x in dresses] # add a space after the name to exlude partial matches


### Create Dataframe

In [33]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for dress in dresses:
        if dress in title:
            style = dress
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    


  df = pd.concat([df,df1], axis =0,ignore_index= True)


In [34]:
df.head()

Unnamed: 0,title,price,size,category,style,brand
0,doen poet ruffled midi dress - black - xs,160.0,XS,dress,,doen
1,doen carina eyelet mini dress black - size xl,150.0,XL,dress,,doen
2,doen long puff sleeve maxi dress green floral ...,200.0,M,dress,,doen
3,doen shoulder tie floral maxi dress - size xs,100.0,XS,dress,,doen
4,doen victorie maxi dress in mirbeau garden flo...,200.0,M,dress,,doen


## Load

In [35]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()

Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,doen poet ruffled midi dress - black - xs,160.0,XS,dress,,doen,2024-Dec-30-17:24:53
1,doen carina eyelet mini dress black - size xl,150.0,XL,dress,,doen,2024-Dec-30-17:24:53
2,doen long puff sleeve maxi dress green floral ...,200.0,M,dress,,doen,2024-Dec-30-17:24:53
3,doen shoulder tie floral maxi dress - size xs,100.0,XS,dress,,doen,2024-Dec-30-17:24:53
4,doen victorie maxi dress in mirbeau garden flo...,200.0,M,dress,,doen,2024-Dec-30-17:24:53


# Doen Tops

## Extract

In [36]:
# Sold Doen Tops
url = 'https://poshmark.com/brand/Doen-Women-Tops?availability=sold_out'
brand = 'doen'
category = 'top'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [37]:
# Tops
tops = [
    "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
    "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
    "Viola", "Monique", "Sophie", "Claudine", "Elena",
    "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
    "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
    "Colette", "Marielle", "Florelle", "Adeline", "Renee",
    "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
    "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
    "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
    "Josephine", "Evelina", "Lucia", "Freya", "Marion",
    "Scarlet", "Jane", "June", "Lille", "Fidella", "Frederica",
    "Anaya", "Idella", "Baptisia", "Eldoris", "Camri",
    "Jacey", "Leia", "Amaline", "Solange", "Evette",
    "Lois", "Alida", "Ashby", "Nerida", "Agotha",
    "Frances", "Paola", "Vivienne"
]

tops = [x.lower() for x in tops]
tops = [x +' ' for x in tops] # add a space after the name to exlude partial matches

### Create Dataframe

In [38]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for top in tops:
        if top in title:
            style = top
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


## Load

In [39]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')
   
df.head() 


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,doen viola black and white floral ruffled top ...,90.0,L,top,viola,doen,2024-Dec-30-17:24:55
1,nwt rixo eletrrice blouse,90.0,XS,top,,doen,2024-Dec-30-17:24:55
2,doen vera top in salt xs,125.0,XS,top,vera,doen,2024-Dec-30-17:24:55
3,nwt doen docia henri taylor eyelet lace puff s...,92.0,XS,top,henri,doen,2024-Dec-30-17:24:55
4,doen fidella sleeveless blouse salt top new nwt,118.0,M,top,fidella,doen,2024-Dec-30-17:24:55


# Doen Skirts

## Extract

In [40]:
# Sold Doen Skirts
url = 'https://poshmark.com/brand/Doen-Women-Skirts?availability=sold_out'
brand = 'doen'
category = 'skirt'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [41]:
# Skirts
skirts = [
    "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
    "Odile", "Esther", "Giselle", "Clementine", "Simone",
    "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
    "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
    "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
    "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
    "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
    "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
    "Odessa", "Viola", "Lenore", "Selene", "Camille",
    "Amelie", "Liora", "Fiona", "Adria", "Melisande","Elowena",
    "Nandi","Cheri","Fabienne","Rieti","Sanremo"
]

skirts = [x.lower() for x in skirts]
# skirts = [x +' ' for x in skirts] # add a space after the name to exlude partial matches


### Create Dataframe

In [42]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for skirt in skirts:
        if skirt in title:
            style = skirt
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


## Load

In [43]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')
   
df.head() 

Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,nwt doen elina skirt - size 2,190.0,2,skirt,,doen,2024-Dec-30-17:24:57
1,"doen elowena skirt in villette paisley, brand ...",170.0,S,skirt,elowen,doen,2024-Dec-30-17:24:57
2,"doen sebastiane lace skirt in black, brand new...",180.0,S,skirt,sebastiane,doen,2024-Dec-30-17:24:57
3,doen nandi skirt in daffodil yellow,168.0,Various,skirt,nandi,doen,2024-Dec-30-17:24:57
4,"doen sebastiane lace skirt in black, brand new...",180.0,XS,skirt,sebastiane,doen,2024-Dec-30-17:24:57


# Hill House

## Dresses

In [44]:
# Sold Hil House Dresses
url = 'https://poshmark.com/brand/Hill_House-Women-Dresses?availability=sold_out'
brand = 'hill_house'
category = 'dress'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,hill house nap dress,45.0,S,dress,,hill_house,2024-Dec-30-17:24:58
1,hill house athena nap dress mermaid like new c...,55.0,L,dress,,hill_house,2024-Dec-30-17:24:58
2,hill house nesli green trellis smocked midi dress,75.0,S,dress,,hill_house,2024-Dec-30-17:24:58
3,hill house home women's the grace maxi nap dre...,104.0,S,dress,,hill_house,2024-Dec-30-17:24:58
4,new hill house ellie nap dress in navy gingham l,94.0,L,dress,,hill_house,2024-Dec-30-17:24:58


## Skirts

In [45]:

url = 'https://poshmark.com/brand/Hill_House-Women-Skirts?availability=sold_out'
brand = 'hill_house'
category = 'skirt'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,hill house skirt nwt,80.0,S,skirt,,hill_house,2024-Dec-30-17:24:59
1,hill house size l delphine skirt nwt,25.0,L,skirt,,hill_house,2024-Dec-30-17:24:59
2,nwt hill house home paz linen mini skirt in navy,12.0,M,skirt,,hill_house,2024-Dec-30-17:24:59
3,hill house mirabel skirt,55.0,M,skirt,,hill_house,2024-Dec-30-17:24:59
4,hill house home nwt sz l delphine nap skirt na...,50.0,L,skirt,,hill_house,2024-Dec-30-17:24:59


## Tops

In [46]:
url = 'https://poshmark.com/brand/Hill_House-Women-Tops?availability=sold_out'
brand = 'hill_house'
category = 'top'


response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,hill house paz top navy,35.0,M,top,,hill_house,2024-Dec-30-17:25:00
1,hill house home fairy secret garden sweatshirt...,21.0,L,top,,hill_house,2024-Dec-30-17:25:00
2,hill house jade top,22.0,M,top,,hill_house,2024-Dec-30-17:25:00
3,hill house the ballet wrap sweater,34.0,S,top,,hill_house,2024-Dec-30-17:25:00
4,the paz top,70.0,S,top,,hill_house,2024-Dec-30-17:25:00


# Agua by Agua Bendita

## Dresses

In [47]:
url = 'https://poshmark.com/brand/Agua_by_Agua_Bendita-Women-Dresses?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,agua bendita mambo primavera linen midi dress,245.0,S,dress,,aguaByAguaBendita,2024-Dec-30-17:25:01
1,agua by agua bendita manzanilla anturios mini ...,145.0,XS,dress,,aguaByAguaBendita,2024-Dec-30-17:25:01
2,agua agua bendita sucre cut-out poplin floral ...,95.0,S,dress,,aguaByAguaBendita,2024-Dec-30-17:25:01
3,agua by agua bendita caramelo primavera cotton...,15.0,S,dress,,aguaByAguaBendita,2024-Dec-30-17:25:01
4,nwot agua by agua bendita floral midi dress,80.0,M,dress,,aguaByAguaBendita,2024-Dec-30-17:25:01


## Skirts

In [48]:
url = 'https://poshmark.com/brand/Agua_by_Agua_Bendita-Women-Skirts?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,agua by agua bendita lavanda cotton silk blend...,220.0,one size fits S-M-L,skirt,,aguaByAguaBendita,2024-Dec-30-17:25:03
1,agua by agua bendita arroyo caracola midi skirt,2100.0,S,skirt,,aguaByAguaBendita,2024-Dec-30-17:25:03
2,agua by agua bendita 'bergamota' green printed...,250.0,S,skirt,,aguaByAguaBendita,2024-Dec-30-17:25:03
3,agua bendita astro esmeralda lavanda sarong pa...,110.0,One Size,skirt,,aguaByAguaBendita,2024-Dec-30-17:25:03
4,agua by agua bendita\nalcachofa printed cotton...,147.0,M,skirt,,aguaByAguaBendita,2024-Dec-30-17:25:03


## Tops

In [49]:
url = 'https://poshmark.com/brand/Reformation-Women-Tops?availability=sold_out'
brand = 'aguaByAguaBendita'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation julia rib-knit tank in cream,28.0,XS,top,,aguaByAguaBendita,2024-Dec-30-17:25:04
1,gorgeous cherry reformation top!!!,28.0,4,top,,aguaByAguaBendita,2024-Dec-30-17:25:04
2,reformation delevan top - black - size 4,45.0,4,top,,aguaByAguaBendita,2024-Dec-30-17:25:04
3,reformation ribbed white cropped henley - size...,25.0,M,top,,aguaByAguaBendita,2024-Dec-30-17:25:04
4,reformation reign daisy chain black crop top -...,40.0,8,top,,aguaByAguaBendita,2024-Dec-30-17:25:04


# Reformation

## Dresses

In [50]:
url = 'https://poshmark.com/brand/Reformation-Women-Dresses?availability=sold_out'
brand = 'reformation'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation floral wrap maxi dress medium,70.0,M,dress,,reformation,2024-Dec-30-17:25:05
1,"nwt ""reformation mya floral puff sleeve dress ...",175.0,0,dress,,reformation,2024-Dec-30-17:25:05
2,"reformation nwt crepe mini dress, in pompadour...",165.0,2,dress,,reformation,2024-Dec-30-17:25:05
3,reformation•worn 1x•rare color• winslow maxi f...,175.0,S,dress,,reformation,2024-Dec-30-17:25:05
4,reformation nwt gavin dress in “avian” size: 2,179.0,2,dress,,reformation,2024-Dec-30-17:25:05


## Skirts

In [51]:
url = 'https://poshmark.com/brand/Reformation-Women-Skirts?availability=sold_out'
brand = 'reformation'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation daisy chain floral midi side split...,50.0,2,skirt,,reformation,2024-Dec-30-17:25:07
1,new reformation x laura harrier lakisha silk s...,135.0,2,skirt,,reformation,2024-Dec-30-17:25:07
2,reformation layla midi skirt - mahogany wine b...,125.0,Various,skirt,,reformation,2024-Dec-30-17:25:07
3,reformation linen oak skirt,20.0,0,skirt,,reformation,2024-Dec-30-17:25:07
4,reformation bea skirt,80.0,8,skirt,,reformation,2024-Dec-30-17:25:07


## Tops

In [52]:
url = 'https://poshmark.com/brand/Reformation-Women-Tops?availability=sold_out'
brand = 'reformation'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,reformation julia rib-knit tank in cream,28.0,XS,top,,reformation,2024-Dec-30-17:25:08
1,gorgeous cherry reformation top!!!,28.0,4,top,,reformation,2024-Dec-30-17:25:08
2,reformation delevan top - black - size 4,45.0,4,top,,reformation,2024-Dec-30-17:25:08
3,reformation ribbed white cropped henley - size...,25.0,M,top,,reformation,2024-Dec-30-17:25:08
4,reformation reign daisy chain black crop top -...,40.0,8,top,,reformation,2024-Dec-30-17:25:08


# Zimmerman

## Dresses

In [53]:
url = 'https://poshmark.com/brand/zimmerman-Women-Dresses?availability=sold_out'
brand = 'zimmerman'
category = 'dress'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,zimmerman georgette dress,195.0,6,dress,,zimmerman,2024-Dec-30-17:25:09
1,zimmerman,350.0,6,dress,,zimmerman,2024-Dec-30-17:25:09
2,zimmerman off shoulder white 3/4 sleeves multi...,89.0,1 (4-6),dress,,zimmerman,2024-Dec-30-17:25:09
3,zimmerman ready to wear wonderland floral bias...,700.0,6,dress,,zimmerman,2024-Dec-30-17:25:09
4,zimmermann melody off shoulder mini dress yell...,138.0,4,dress,,zimmerman,2024-Dec-30-17:25:09


## Skirts

In [54]:
url = 'https://poshmark.com/brand/zimmerman-Women-Skirts?availability=sold_out'
brand = 'zimmerman'
category = 'skirt'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,black and white skirt,25.0,M,skirt,,zimmerman,2024-Dec-30-17:25:10


## Tops

In [55]:
url = 'https://poshmark.com/brand/zimmerman-Women-Tops?availability=sold_out'
brand = 'zimmerman'
category = 'top'

response = requests.get(url)
# response.status_code

html = response.text
soup = BeautifulSoup(html, "html.parser") 

titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style','brand']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:].replace(",",""))
    size = sizes[x].text.strip()[6:]
    
    style = None
    # for skirt in skirts:
    #     if skirt in title:
    #         style = skirt
    #         break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':category,
                 'style':style,
                 'brand':brand}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/{brand}_{category}_{file_timestamp}.csv',index=False)

# Log process
message = f"{brand} {category} Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

df.head()


  df = pd.concat([df,df1], axis =0,ignore_index= True)


Unnamed: 0,title,price,size,category,style,brand,ingest_timestamp
0,#49 zimmermann lace off shoulder top,66.0,6,top,,zimmerman,2024-Dec-30-17:25:11
1,zimmerman - botanica wattle blouse,440.0,6,top,,zimmerman,2024-Dec-30-17:25:11
2,zimmerman botanica high- neck cropped bow-tie ...,950.0,Size 4 or US 12,top,,zimmerman,2024-Dec-30-17:25:11
3,nwt zimmerman kali hibiscus linen blouse size 1,70.0,S,top,,zimmerman,2024-Dec-30-17:25:11
4,zimmerman silk organza paisley print top sz3,50.0,Size 3 ZImmerman,top,,zimmerman,2024-Dec-30-17:25:11


# Utilities

## Product Style Names

In [56]:
# # Tops
# tops = [
#     "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
#     "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
#     "Viola", "Monique", "Sophie", "Claudine", "Elena",
#     "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
#     "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
#     "Colette", "Marielle", "Florelle", "Adeline", "Renee",
#     "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
#     "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
#     "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
#     "Josephine", "Evelina", "Lucia", "Freya", "Marion"
# ]

# # Skirts
# skirts = [
#     "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
#     "Odile", "Esther", "Giselle", "Clementine", "Simone",
#     "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
#     "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
#     "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
#     "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
#     "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
#     "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
#     "Odessa", "Viola", "Lenore", "Selene", "Camille",
#     "Amelie", "Liora", "Fiona", "Adria", "Melisande"
# ]

# # Pants
# pants = [
#     "Diana", "Clara", "Eloise", "Harper", "Amalia",
#     "Marguerite", "Florine", "Estelle", "Genevieve", "Sabine",
#     "Odessa", "Lorelei", "Miriam", "Joanna", "Lenore",
#     "Viviana", "Cecile", "Delia", "Lucinda", "Aurora",
#     "Francesca", "Eliana", "Isobel", "Norah", "Camille",
#     "Selena", "Veronica", "Marina", "Adela", "Corinne",
#     "Penelope", "Rosalyn", "Evelyn", "Jocasta", "Beatrice",
#     "Cressida", "Natalia", "Florence", "Sophia", "Lyra",
#     "Octavia", "Philippa", "Sabina", "Clarissa", "Giselle",
#     "Ines", "Amara", "Elara", "Lucille", "Mattea"
# ]

# # Jeans
# jeans = [
#     "Diana", "Harper", "Juliette", "Sienna", "Isabel",
#     "Audrey", "Celeste", "Phoebe", "Emilia", "Florence",
#     "Margo", "Eleanor", "Claudia", "Colette", "Bianca",
#     "Anastasia", "Lena", "Amelie", "Louisa", "Monica",
#     "Rosa", "Thea", "Verity", "Naomi", "Tessa",
#     "Beatrix", "Delilah", "Eliza", "Helena", "Sylvia",
#     "Odette", "Simone", "Freya", "Camilla", "Vera",
#     "Gwen", "Margaux", "Adeline", "Clara", "Anneliese",
#     "Bridget", "Catherine", "Daphne", "Elinor", "Felicity",
#     "Georgina", "Hannah", "Isolde", "Jade", "Maribel"
# ]


## Doen Product Names

In [57]:
# # Doen Dresses
# url = 'https://www.shopdoen.com/collections/dresses?utm_source=google&campaign_id=10806218531&ad_id=724988960813&utm_medium=cpc&utm_campaign=actual_campaign_name_hardcoded&utm_content=104400744937&utm_term=&gclid=Cj0KCQiA4L67BhDUARIsADWrl7E0ZY9XqbDkatpsUlP50aYS24KA_m3tyCe0SNkiYI4icGTUFzKFJ7YaAoDHEALw_wcB&gad_source=1'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 

In [58]:
# # Doen Tops
# url = 'https://www.shopdoen.com/collections/tops'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 