# Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from datetime import datetime as dt
# import datetime as dt
from scheduler import Scheduler
import scheduler.trigger as trigger

# Doen Dresses

## Extract

In [2]:
# Sold Doen Dresses
url = 'https://poshmark.com/search?query=Doen&brand%5B%5D=Doen&availability=sold_out&department=Women&category=Dresses'

response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set dress styles

In [3]:
# Dresses
dresses = [
    "Ischia", "Marianne", "Rosaria", "Lisbeth", "Wilmina",
    "Aurora", "Delphine", "Clara", "Sabine", "Odette",
    "Evelyn", "Juliette", "Amelie", "Camille", "Vera",
    "Fiona", "Nina", "Corinne", "Helene", "Lucille",
    "Margot", "Anya", "Freya", "Sylvie", "Celine",
    "Daphne", "Iris", "Luna", "Flora", "Amara",
    "Maeve", "Adelaide", "Ophelia", "Scarlett", "Viola",
    "Anastasia", "Claudia", "Elise", "Matilda", "Eloise",
    "Gwendolyn", "Ivy", "Cecilia", "Esme", "Tessa",
    "Madeleine", "Poppy", "Adele", "Serena", "Genevieve",
    "Lorraine", "Merla", "Narcisse", "Augustina", "Sabrina",
    "Alessandra", "Leyah", "Guinevere", "Francetta", "Kaira",
    "Raquella", "Pamelina", "Quinn", "Celestine", "Emmaretta",
    "Piper", "Violetta", "Noisette", "Franique", "Sera",
    "Lovisa", "Rhodia", "Gladys"
]


dresses = [x.lower() for x in dresses]
dresses = [x +' ' for x in dresses] # add a space after the name to exlude partial matches


### Create Dataframe

In [4]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for dress in dresses:
        if dress in title:
            style = dress
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"dress",
                 'style':style}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)
    


  df = pd.concat([df,df1], axis =0,ignore_index= True)


In [5]:
df.head()

Unnamed: 0,title,price,size,category,style
0,doen sol puff sleeve smocked maxi dress black ...,200.0,M,dress,
1,doen blair dress,175.0,S,dress,
2,doen poet ruffled midi dress - black - xs,160.0,XS,dress,
3,doen carina eyelet mini dress black - size xl,150.0,XL,dress,
4,doen long puff sleeve maxi dress green floral ...,200.0,M,dress,


## Load

In [6]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/doen_dresses_{file_timestamp}.csv',index=False)

# Log process
message = "Doen Dresses Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')


# Doen Tops

## Extract

In [7]:
# Sold Doen Tops
url = 'https://poshmark.com/brand/Doen-Women-Tops?availability=sold_out'
response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [8]:
# Tops
tops = [
    "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
    "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
    "Viola", "Monique", "Sophie", "Claudine", "Elena",
    "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
    "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
    "Colette", "Marielle", "Florelle", "Adeline", "Renee",
    "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
    "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
    "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
    "Josephine", "Evelina", "Lucia", "Freya", "Marion",
    "Scarlet", "Jane", "June", "Lille", "Fidella", "Frederica",
    "Anaya", "Idella", "Baptisia", "Eldoris", "Camri",
    "Jacey", "Leia", "Amaline", "Solange", "Evette",
    "Lois", "Alida", "Ashby", "Nerida", "Agotha",
    "Frances", "Paola", "Vivienne"
]

tops = [x.lower() for x in tops]
tops = [x +' ' for x in tops] # add a space after the name to exlude partial matches

### Create Dataframe

In [9]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for top in tops:
        if top in title:
            style = top
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"top",
                 'style':style}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


In [10]:
df

Unnamed: 0,title,price,size,category,style
0,dòen flowered blouse of good size,90.0,L,top,
1,doen viola black and white floral ruffled top ...,90.0,L,top,viola
2,doen vera top in salt xs,125.0,XS,top,vera
3,doen fidella sleeveless blouse salt top new nwt,118.0,M,top,fidella
4,doen lois tank top new nwt,95.0,M,top,lois
5,doen seine rosebud tank new nwt,95.0,L,top,
6,doen agotha top,130.0,XS,top,agotha
7,dôen lille top in monte isola stripe,100.0,XS,top,lille
8,the jane blouse,80.0,S,top,jane
9,"dôen alida top, countryside\npatchwork print",95.0,S,top,alida


## Load

In [11]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/doen_tops_{file_timestamp}.csv',index=False)

# Log process
message = "Doen Tops Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')


# Doen Skirts

## Extract

In [12]:
# Sold Doen Skirts
url = 'https://poshmark.com/brand/Doen-Women-Skirts?availability=sold_out'
response = requests.get(url)
# response.status_code
html = response.text
soup = BeautifulSoup(html, "html.parser") 

## Transform

### Set Styles

In [13]:
# Skirts
skirts = [
    "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
    "Odile", "Esther", "Giselle", "Clementine", "Simone",
    "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
    "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
    "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
    "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
    "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
    "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
    "Odessa", "Viola", "Lenore", "Selene", "Camille",
    "Amelie", "Liora", "Fiona", "Adria", "Melisande","Elowena",
    "Nandi","Cheri","Fabienne","Rieti","Sanremo"
]

skirts = [x.lower() for x in skirts]
# skirts = [x +' ' for x in skirts] # add a space after the name to exlude partial matches


### Create Dataframe

In [14]:
titles = soup.select(".tile__title") # listing title
prices = soup.select(".fw--bold") # sold price
sizes = soup.select(".ellipses.tile__details__pipe__size") # Size

attribs = ['title','price','size','category','style']
df = pd.DataFrame(columns = attribs)


for x in range(len(titles)):
    title = str.lower(titles[x].text.strip())
    price = float(prices[x].text.strip()[1:])
    size = sizes[x].text.strip()[6:]
    
    style = None
    for skirt in skirts:
        if skirt in title:
            style = skirt
            break # stop after the first match 
    
    data_dict = {'title':title,
                 'price':price,
                 'size':size,
                 'category':"skirt",
                 'style':style}
    
    df1 = pd.DataFrame(data_dict,index=[0])
    df = pd.concat([df,df1], axis =0,ignore_index= True)

  df = pd.concat([df,df1], axis =0,ignore_index= True)


In [15]:
df

Unnamed: 0,title,price,size,category,style
0,"doen elowena skirt in villette paisley, brand ...",170.0,S,skirt,elowen
1,"doen sebastiane lace skirt in black, brand new...",180.0,S,skirt,sebastiane
2,doen nandi skirt in daffodil yellow,168.0,Various,skirt,nandi
3,"doen sebastiane lace skirt in black, brand new...",180.0,XS,skirt,sebastiane
4,nwt doen elowen skirt in deep navy sz m,220.0,M,skirt,elowen
5,"doen elowen, small, vguc",200.0,S,skirt,elowen
6,doen sebastiane skirt in powder,160.0,S,skirt,sebastiane
7,"doen cheri skirt, size 6, deep navy",140.0,6,skirt,cheri
8,"doen skirt , black, size 8",178.0,8,skirt,
9,fabienne skirt\nin navy countryside plaid\nsize m,250.0,M,skirt,fabienne


## Load

In [16]:
# Write to CSV
timestamp_format = '%Y-%h-%d-%H:%M:%S'
now = dt.now()
timestamp = now.strftime(timestamp_format)
df['ingest_timestamp'] = timestamp

file_timestamp_format = '%Y%m%d_%H-%M-%S'
file_timestamp = now.strftime(file_timestamp_format)

df.to_csv(f'data/doen_skirts_{file_timestamp}.csv',index=False)

# Log process
message = "Doen Skirts Logged"
with open("scrape_log.txt","a") as file:
    file.write(timestamp + " " + message + '\n')

# Utilities

## Product Style Names

In [17]:
# # Tops
# tops = [
#     "Henri", "Anisa", "Emiliana", "Ingrid", "Joelle",
#     "Celeste", "Faye", "Gabrielle", "Juliana", "Odessa",
#     "Viola", "Monique", "Sophie", "Claudine", "Elena",
#     "Leona", "Paloma", "Bianca", "Marisol", "Estelle",
#     "Inez", "Lorena", "Delilah", "Jocelyn", "Amara",
#     "Colette", "Marielle", "Florelle", "Adeline", "Renee",
#     "Sabrina", "Fleur", "Noemi", "Simone", "Vera",
#     "Natalia", "Yvette", "Clarisse", "Odile", "Beatrice",
#     "Camilla", "Tatiana", "Nadia", "Cleo", "Lydia",
#     "Josephine", "Evelina", "Lucia", "Freya", "Marion"
# ]

# # Skirts
# skirts = [
#     "Elowen", "Sebastiane", "Raina", "Beatrice", "Florence",
#     "Odile", "Esther", "Giselle", "Clementine", "Simone",
#     "Aurora", "Emmeline", "Coralie", "Juliet", "Isadora",
#     "Felicity", "Lucia", "Vivienne", "Celestine", "Anais",
#     "Noelle", "Gabriella", "Liliana", "Violette", "Rosalind",
#     "Allegra", "Seraphina", "Eleanor", "Arden", "Madeleine",
#     "Sophia", "Helena", "Marguerite", "Arabelle", "Isla",
#     "Delphine", "Anouk", "Carys", "Evangeline", "Maris",
#     "Odessa", "Viola", "Lenore", "Selene", "Camille",
#     "Amelie", "Liora", "Fiona", "Adria", "Melisande"
# ]

# # Pants
# pants = [
#     "Diana", "Clara", "Eloise", "Harper", "Amalia",
#     "Marguerite", "Florine", "Estelle", "Genevieve", "Sabine",
#     "Odessa", "Lorelei", "Miriam", "Joanna", "Lenore",
#     "Viviana", "Cecile", "Delia", "Lucinda", "Aurora",
#     "Francesca", "Eliana", "Isobel", "Norah", "Camille",
#     "Selena", "Veronica", "Marina", "Adela", "Corinne",
#     "Penelope", "Rosalyn", "Evelyn", "Jocasta", "Beatrice",
#     "Cressida", "Natalia", "Florence", "Sophia", "Lyra",
#     "Octavia", "Philippa", "Sabina", "Clarissa", "Giselle",
#     "Ines", "Amara", "Elara", "Lucille", "Mattea"
# ]

# # Jeans
# jeans = [
#     "Diana", "Harper", "Juliette", "Sienna", "Isabel",
#     "Audrey", "Celeste", "Phoebe", "Emilia", "Florence",
#     "Margo", "Eleanor", "Claudia", "Colette", "Bianca",
#     "Anastasia", "Lena", "Amelie", "Louisa", "Monica",
#     "Rosa", "Thea", "Verity", "Naomi", "Tessa",
#     "Beatrix", "Delilah", "Eliza", "Helena", "Sylvia",
#     "Odette", "Simone", "Freya", "Camilla", "Vera",
#     "Gwen", "Margaux", "Adeline", "Clara", "Anneliese",
#     "Bridget", "Catherine", "Daphne", "Elinor", "Felicity",
#     "Georgina", "Hannah", "Isolde", "Jade", "Maribel"
# ]


## Doen Product Names

In [18]:
# # Doen Dresses
# url = 'https://www.shopdoen.com/collections/dresses?utm_source=google&campaign_id=10806218531&ad_id=724988960813&utm_medium=cpc&utm_campaign=actual_campaign_name_hardcoded&utm_content=104400744937&utm_term=&gclid=Cj0KCQiA4L67BhDUARIsADWrl7E0ZY9XqbDkatpsUlP50aYS24KA_m3tyCe0SNkiYI4icGTUFzKFJ7YaAoDHEALw_wcB&gad_source=1'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 

In [19]:
# # Doen Tops
# url = 'https://www.shopdoen.com/collections/tops'
# response = requests.get(url)

# # response.status_code
# html = response.text
# soup = BeautifulSoup(html, "html.parser") 

# elements = soup.select(".full-unstyled-link") # Size

# for element in elements:
#     print(element.text.strip()) 