# Scraping article

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# Define the URL of the webpage you want to scrape
url = "https://fashionunited.com/news/fashion/embracing-comfort-woven-footwear-for-ss24/2024050859770"

# Send a GET request to the URL and retrieve the webpage content
response = requests.get(url)

# Create a BeautifulSoup object by passing the webpage content and specifying the parser
soup = BeautifulSoup(response.content, "html.parser")

# Extract the title
title = soup.find("h1").text

# Extract the article content
content = soup.find("div", class_="css-1s0my6s e15wwp330").get_text(strip=True)

# Display the extracted information
print("Title:", title)
print("Content:", content[:200], "...")  # Display the first 200 characters for preview


Title: Embracing comfort: Woven footwear for SS24
Content: As we step into the warmer months, the fashion industry is witnessing a significant revival of woven footwear, marking a stylish convergence of tradition and contemporary flair. The concept of woven s ...


# Processing with ChatGPT

In [3]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
import private # private.py file with keys

In [4]:
def getFashionTerms(articleContent):

    # Initialize the chat model
    llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=private.ChatOpenAI_KEY)

    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """You are a helpful assistant that gets the fashion terms from the given article content. These terms are going to be used later to analyse the frequency and predict the trends.
                These terms are fashion_items, fashion_brands, fashion_styles, fashion_patterns, fashion_materials, fashion_trends.
                After getting these terms, return a raw python dictionary with the keys as the terms and the values as the list of terms. 
                The article content is: {input}""",
            ),
        ]
    )

    chain = prompt | llm

    # Generate a response from the chat model
    response = chain.invoke(
        {
            "input": articleContent,
        }
    )
        
    return response

# Get the fashion terms from the article content
response = getFashionTerms(content)

In [5]:
# get only the content from the response
response_content = response.content
response_dict = eval(response_content)
# separate the fashion terms
fashion_items = response_dict['fashion_items']
fashion_brands = response_dict['fashion_brands']
fashion_styles = response_dict['fashion_styles']
fashion_patterns = response_dict['fashion_patterns']
fashion_materials = response_dict['fashion_materials']
fashion_trends = response_dict['fashion_trends']

# Display the extracted fashion terms
print("Fashion Items:", fashion_items)
print("Fashion Brands:", fashion_brands)
print("Fashion Styles:", fashion_styles)
print("Fashion Patterns:", fashion_patterns)
print("Fashion Materials:", fashion_materials)
print("Fashion Trends:", fashion_trends)


Fashion Items: ['woven footwear', 'shoes', 'clog', 'slippers', 'sandals']
Fashion Brands: ['Freedom Moses']
Fashion Styles: ['boho-chic', 'high-fashion', 'street style', 'modern bohemian']
Fashion Patterns: ['intricate weaves', 'traditional patterns', 'metallic hues']
Fashion Materials: ['natural materials', 'synthetic blends', 'natural fibres', 'vegan materials']
Fashion Trends: ['woven footwear', 'vibrant colours', 'metallic hues', 'sustainable fashion', 'ethical manufacturing']


In [7]:
# store each set of fashion terms in a tabular format with the following columns: date, term, frequency
import pandas as pd
from datetime import datetime

# Get the current date
date = datetime.now().strftime("%Y-%m-%d")

# Create a DataFrame for fashion items
fashion_items_data = {
    "date": [date] * len(fashion_items),
    "term": fashion_items,
    "frequency": [1] * len(fashion_items),
}
fashion_items_df = pd.DataFrame(fashion_items_data)

# Create a DataFrame for fashion brands
fashion_brands_data = {
    "date": [date] * len(fashion_brands),
    "term": fashion_brands,
    "frequency": [1] * len(fashion_brands),
}
fashion_brands_df = pd.DataFrame(fashion_brands_data)

# Create a DataFrame for fashion styles
fashion_styles_data = {
    "date": [date] * len(fashion_styles),
    "term": fashion_styles,
    "frequency": [1] * len(fashion_styles),
}
fashion_styles_df = pd.DataFrame(fashion_styles_data)

# Create a DataFrame for fashion patterns
fashion_patterns_data = {
    "date": [date] * len(fashion_patterns),
    "term": fashion_patterns,
    "frequency": [1] * len(fashion_patterns),
}
fashion_patterns_df = pd.DataFrame(fashion_patterns_data)

# Create a DataFrame for fashion materials
fashion_materials_data = {
    "date": [date] * len(fashion_materials),
    "term": fashion_materials,
    "frequency": [1] * len(fashion_materials),
}
fashion_materials_df = pd.DataFrame(fashion_materials_data)

# Display all the DataFrames
print("Fashion Items DataFrame:")
print(fashion_items_df)

print("\nFashion Brands DataFrame:")
print(fashion_brands_df)

print("\nFashion Styles DataFrame:")
print(fashion_styles_df)

print("\nFashion Patterns DataFrame:")
print(fashion_patterns_df)

print("\nFashion Materials DataFrame:")
print(fashion_materials_df)

Fashion Items DataFrame:
         date            term  frequency
0  2024-05-15  woven footwear          1
1  2024-05-15           shoes          1
2  2024-05-15            clog          1
3  2024-05-15        slippers          1
4  2024-05-15         sandals          1

Fashion Brands DataFrame:
         date           term  frequency
0  2024-05-15  Freedom Moses          1

Fashion Styles DataFrame:
         date             term  frequency
0  2024-05-15        boho-chic          1
1  2024-05-15     high-fashion          1
2  2024-05-15     street style          1
3  2024-05-15  modern bohemian          1

Fashion Patterns DataFrame:
         date                  term  frequency
0  2024-05-15      intricate weaves          1
1  2024-05-15  traditional patterns          1
2  2024-05-15         metallic hues          1

Fashion Materials DataFrame:
         date               term  frequency
0  2024-05-15  natural materials          1
1  2024-05-15   synthetic blends          1
2  202