# Getting articles

In [4]:

import json

# Read the JSON file
with open('ScrapeArticles/articles.json', 'r', encoding="utf8") as file:
    articles = json.load(file)

# Print the articles
print(articles)

[{'title': 'Seasonal NOS strategy: Buying during high summer season ', 'content': ['As the peak of summer approaches, the way we dress evolves into a thoughtful blend of functionality and style. High summer, with its promise of blazing sunshine and longer daylight hours, demands clothing that is both fashionable and practical, requiring a nuanced approach to fabric selection and design. FashionUnited has examined the key strategies for curating a high summer wardrobe, focusing on comfort, style, and the innovative designs from Cup of Joe, a denim brand that seamlessly blends classic and contemporary fashion with a strategic ', ' approach. By ensuring the continuous availability of key summer staples and beloved classics, this inventory management strategy enables brands to optimise resource use and prolong the lifecycles of their products, all while keeping summer fashion fresh and accessible.', 'Fashion buyers and retailers increasingly turn to NOS products. This strategy offers signi

In [19]:
# for article in articles, get content lists and append all the content to one list
content = []
for article in articles:
    content.extend(article['content'])

content_string = ' '.join(content)

In [25]:
content

['As the peak of summer approaches, the way we dress evolves into a thoughtful blend of functionality and style. High summer, with its promise of blazing sunshine and longer daylight hours, demands clothing that is both fashionable and practical, requiring a nuanced approach to fabric selection and design. FashionUnited has examined the key strategies for curating a high summer wardrobe, focusing on comfort, style, and the innovative designs from Cup of Joe, a denim brand that seamlessly blends classic and contemporary fashion with a strategic ',
 ' approach. By ensuring the continuous availability of key summer staples and beloved classics, this inventory management strategy enables brands to optimise resource use and prolong the lifecycles of their products, all while keeping summer fashion fresh and accessible.',
 'Fashion buyers and retailers increasingly turn to NOS products. This strategy offers significant advantages over traditional purchasing methods, which often require buyin

In [24]:
print(content_string)

As the peak of summer approaches, the way we dress evolves into a thoughtful blend of functionality and style. High summer, with its promise of blazing sunshine and longer daylight hours, demands clothing that is both fashionable and practical, requiring a nuanced approach to fabric selection and design. FashionUnited has examined the key strategies for curating a high summer wardrobe, focusing on comfort, style, and the innovative designs from Cup of Joe, a denim brand that seamlessly blends classic and contemporary fashion with a strategic   approach. By ensuring the continuous availability of key summer staples and beloved classics, this inventory management strategy enables brands to optimise resource use and prolong the lifecycles of their products, all while keeping summer fashion fresh and accessible. Fashion buyers and retailers increasingly turn to NOS products. This strategy offers significant advantages over traditional purchasing methods, which often require buying inventor

# Processing with ChatGPT

In [21]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
import private # private.py file with keys

In [22]:
def getFashionTerms(articleContent):

    # Initialize the chat model
    llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key=private.ChatOpenAI_KEY)

    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """You are a helpful assistant that gets the fashion terms from the given article content. These terms are going to be used later to analyse the frequency and predict the trends.
                These terms are fashion_items, fashion_brands, fashion_styles, fashion_patterns, fashion_materials, fashion_trends.
                After getting these terms, return a raw python dictionary with the keys as the terms and the values as the list of terms. 
                The article content is: {input}""",
            ),
        ]
    )

    chain = prompt | llm

    # Generate a response from the chat model
    response = chain.invoke(
        {
            "input": articleContent,
        }
    )
        
    return response

# Get the fashion terms from the article content
response = getFashionTerms(content_string)

In [23]:
# get only the content from the response
response_content = response.content
response_dict = eval(response_content)
# separate the fashion terms
fashion_items = response_dict['fashion_items']
fashion_brands = response_dict['fashion_brands']
fashion_styles = response_dict['fashion_styles']
fashion_patterns = response_dict['fashion_patterns']
fashion_materials = response_dict['fashion_materials']
fashion_trends = response_dict['fashion_trends']

# Display the extracted fashion terms
print("Fashion Items:", fashion_items)
print("Fashion Brands:", fashion_brands)
print("Fashion Styles:", fashion_styles)
print("Fashion Patterns:", fashion_patterns)
print("Fashion Materials:", fashion_materials)
print("Fashion Trends:", fashion_trends)


Fashion Items: ['denim brand', 'dresses', 't-shirts', 'shorts', 'tank tops', 'sunglasses', 'jackets', 'jeans', 'cutoff shorts', 'jumpsuits', 'skirts', 'blouses', 'culottes', 'bra', 'outerwear', 'sneakers', 'boots']
Fashion Brands: ['Cup of Joe', "Levi's", 'Reformation', 'Jimmy Fairly', 'Brochu Walker', 'Dôen', 'Stone Island', 'Dior', 'Johnstons of Elgin', 'Gucci', 'Shein', 'Ebay', 'Certilogo', 'Save The Duck', 'Botter', 'Ahluwalia', 'Natasha Zinko', 'Andersson Bell']
Fashion Styles: ['casual chic', 'minimalist', 'trendy', 'vintage-inspired', 'feminine', 'versatile', 'bohemian', 'relaxed', 'tomboyish', 'street style', 'high and low fashion']
Fashion Patterns: ['patchwork', 'floral prints', 'eyelets', 'geometric']
Fashion Materials: ['cotton', 'linen', 'bamboo', 'bio-acetate', 'bio-nylon', 'lambskin', 'suede', 'leather', 'denim']
Fashion Trends: ['sustainability', 'comfort', 'functionality', 'breathability', 'upcycling', 'DIY', 'recommerce', 'circularity', 'authenticity', 'patchwork deta

In [26]:
# store each set of fashion terms in a tabular format with the following columns: date, term, frequency
import pandas as pd
from datetime import datetime

# Get the current date
date = datetime.now().strftime("%Y-%m-%d")

# Create a DataFrame for fashion items
fashion_items_data = {
    "date": [date] * len(fashion_items),
    "term": fashion_items,
    "frequency": [1] * len(fashion_items),
}
fashion_items_df = pd.DataFrame(fashion_items_data)

# Create a DataFrame for fashion brands
fashion_brands_data = {
    "date": [date] * len(fashion_brands),
    "term": fashion_brands,
    "frequency": [1] * len(fashion_brands),
}
fashion_brands_df = pd.DataFrame(fashion_brands_data)

# Create a DataFrame for fashion styles
fashion_styles_data = {
    "date": [date] * len(fashion_styles),
    "term": fashion_styles,
    "frequency": [1] * len(fashion_styles),
}
fashion_styles_df = pd.DataFrame(fashion_styles_data)

# Create a DataFrame for fashion patterns
fashion_patterns_data = {
    "date": [date] * len(fashion_patterns),
    "term": fashion_patterns,
    "frequency": [1] * len(fashion_patterns),
}
fashion_patterns_df = pd.DataFrame(fashion_patterns_data)

# Create a DataFrame for fashion materials
fashion_materials_data = {
    "date": [date] * len(fashion_materials),
    "term": fashion_materials,
    "frequency": [1] * len(fashion_materials),
}
fashion_materials_df = pd.DataFrame(fashion_materials_data)

# Display all the DataFrames
print("Fashion Items DataFrame:")
print(fashion_items_df)

print("\nFashion Brands DataFrame:")
print(fashion_brands_df)

print("\nFashion Styles DataFrame:")
print(fashion_styles_df)

print("\nFashion Patterns DataFrame:")
print(fashion_patterns_df)

print("\nFashion Materials DataFrame:")
print(fashion_materials_df)

Fashion Items DataFrame:
          date           term  frequency
0   2024-05-15    denim brand          1
1   2024-05-15        dresses          1
2   2024-05-15       t-shirts          1
3   2024-05-15         shorts          1
4   2024-05-15      tank tops          1
5   2024-05-15     sunglasses          1
6   2024-05-15        jackets          1
7   2024-05-15          jeans          1
8   2024-05-15  cutoff shorts          1
9   2024-05-15      jumpsuits          1
10  2024-05-15         skirts          1
11  2024-05-15        blouses          1
12  2024-05-15       culottes          1
13  2024-05-15            bra          1
14  2024-05-15      outerwear          1
15  2024-05-15       sneakers          1
16  2024-05-15          boots          1

Fashion Brands DataFrame:
          date                term  frequency
0   2024-05-15          Cup of Joe          1
1   2024-05-15              Levi's          1
2   2024-05-15         Reformation          1
3   2024-05-15        Jimm