### Get article

In [24]:
import os
import requests
from typing import List, Tuple
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from newsapi import NewsApiClient
from openai import OpenAI

load_dotenv(verbose=True)
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

NEWS_TOKEN = os.getenv("NEWS_API_KEY")
newsapi = NewsApiClient(api_key=NEWS_TOKEN)
top_headlines = newsapi.get_top_headlines(sources='bbc-news', page_size=1)

In [25]:
top_headlines

{'status': 'ok',
 'totalResults': 10,
 'articles': [{'source': {'id': 'bbc-news', 'name': 'BBC News'},
   'author': 'BBC News',
   'title': 'Ten people killed in New Orleans as vehicle ploughs into crowd',
   'description': 'Police say 35 others injured on Bourbon Street by attacker "hell-bent on carnage".',
   'url': 'https://www.bbc.co.uk/news/articles/crl378x8nnjo',
   'urlToImage': 'https://ichef.bbci.co.uk/ace/branded_news/1200/cpsprodpb/c3f4/live/96275010-c837-11ef-8f44-0922f468356e.png',
   'publishedAt': '2025-01-01T13:37:24.800693Z',
   'content': 'A CBS reporter saw multiple people with injuries on the ground at the intersection of Bourbon and Canal streets.\r\nA video confirmed as genuine by BBC Verify show a person lying on the ground and a cr… [+2053 chars]'}]}

In [26]:
def get_article_from_url(url: str) -> str:
    """
    입력받은 URL을 토대로 기사의 본문 태그를 찾아 본문 내용을 반환합니다.
    """
    # get HTML content of url
    print(url)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # select p tag to get content
    paragraphs = soup.find_all('p')
    article_text = '\n'.join([para.get_text() for para in paragraphs])
    
    return article_text

In [27]:
context = get_article_from_url(top_headlines["articles"][0]["url"])
context

https://www.bbc.co.uk/news/articles/crl378x8nnjo


'The suspect in the New Orleans attack that killed 15 people on New Year\'s Day did not act alone, US investigators believe.\nShamsud-Din Jabbar, a 42-year-old US citizen, is believed to have driven a pick-up truck into a crowd on a busy New Orleans street, before exiting the vehicle and firing a weapon. He was shot dead by police at the scene.\nThe FBI says an Islamic State (IS) group flag was found inside the vehicle he was driving, while two improvised explosive devices were found nearby.\nFBI assistant special agent Alethea Duncan said the agency did not believe Jabbar was "solely responsible" and were investigating the incident as an "act of terrorism".\nMore on the New Orleans attack\nLIVE: Several people involved in IS-inspired attack\nThe victims: Aspiring nurse and ex-college football player among the dead\nThe attacker: What we know about Shamsud-Din Jabbar\nWatch: How day of deadly attack unfolded\nPresident Joe Biden said investigators were looking into whether the incident

### Create article

In [28]:
def convert_txt_to_steps(context: str, level: str):
    """
    입력받은 기사 내용(context)을 입력받은 수준(level)에 맞게 재생성합니다.
    """
    
    # set prompt for gpt model
    prompt = f"""
    I'll send you the article body. Please return the value in format like below.
    dict(“rewrite_article": string, “category": string, "keywords": list(str))

    1. Rewrite the article text to a {level} level and put it as the first part of the return value. You must finish article within 50 centences, and line it up appropriately to make it easier to read. Also, be original and must not duplicate any of the original sentences.
    2. Select the category of t]orts, business, innovation, culture, travel, or earth, and enter it as the second value in the return value.
    3. Select 5-7 important words from the article in purpose of learning English and put them as the 3rd part of the return value

    Article Text: {context}
    """
    
    # get response form gpt model
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that rewrites articles for teaching English."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048,
        temperature=0.5
    )
    
    # return response text
    response_dict = response.model_dump()
    response_message = response_dict["choices"][0]["message"]["content"]
    return response_message


In [30]:
leveled_text = convert_txt_to_steps(context, "elementary school")
results = eval(leveled_text)
results

{'rewrite_article': '\n    On New Year\'s Day, there was a terrible attack in New Orleans that resulted in the deaths of 15 people. \n    The police believe the person responsible did not act alone. \n    Shamsud-Din Jabbar, a 42-year-old man from the United States, drove a truck into a crowd on a busy street. \n    After hitting people, he got out of the truck and started shooting. \n    The police shot him at the scene. \n\n    Inside the truck, the FBI found a flag of a group called the Islamic State. \n    They also found two bombs nearby. \n    An FBI agent named Alethea Duncan said they think Jabbar was not the only one involved and are looking into this as an act of terrorism. \n\n    President Joe Biden mentioned that investigators are checking if this attack is connected to an explosion at a hotel in Las Vegas. \n    So far, they have not found any links to the Islamic State from that incident. \n    The police are also looking into whether the attack is related to Donald Trum

### Unsplash Test

In [2]:
import os
import requests
from dotenv import load_dotenv

load_dotenv(verbose=True)
unsplash_access_key = os.getenv("UNSPLASH_ACCESS_KEY")

url = f"https://api.unsplash.com/photos/random?client_id={unsplash_access_key}"
response = requests.get(url=url)
response.json

<bound method Response.json of <Response [200]>>

In [12]:
img_dict = response.json()
print(img_dict)
print(type(img_dict))

{'id': 'uTR1zs3Ky78', 'slug': 'a-cobblestone-street-lined-with-old-buildings-uTR1zs3Ky78', 'alternative_slugs': {'en': 'a-cobblestone-street-lined-with-old-buildings-uTR1zs3Ky78', 'es': 'una-calle-empedrada-bordeada-de-edificios-antiguos-uTR1zs3Ky78', 'ja': '古い建物が立ち並ぶ石畳の通り-uTR1zs3Ky78', 'fr': 'une-rue-pavee-bordee-de-vieux-batiments-uTR1zs3Ky78', 'it': 'una-strada-acciottolata-fiancheggiata-da-vecchi-edifici-uTR1zs3Ky78', 'ko': '오래된-건물이-늘어선-조약돌-거리-uTR1zs3Ky78', 'de': 'eine-kopfsteinpflasterstrasse-die-von-alten-gebauden-gesaumt-ist-uTR1zs3Ky78', 'pt': 'uma-rua-de-paralelepipedos-ladeada-por-edificios-antigos-uTR1zs3Ky78'}, 'created_at': '2024-12-18T10:04:44Z', 'updated_at': '2024-12-31T21:05:32Z', 'promoted_at': '2024-12-23T00:01:00Z', 'width': 8192, 'height': 5464, 'color': '#f3f3f3', 'blur_hash': 'LkMZ]*-;D%Vs_NxuWBWB?vbboftR', 'description': None, 'alt_description': 'A cobblestone street lined with old buildings', 'breadcrumbs': [], 'urls': {'raw': 'https://images.unsplash.com/photo

In [15]:
img_dict["urls"]["small"]

'https://images.unsplash.com/photo-1734515932217-a5f737525cc0?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w2OTIwNTB8MHwxfHJhbmRvbXx8fHx8fHx8fDE3MzU3MjM3MDF8&ixlib=rb-4.0.3&q=80&w=400'

In [18]:
url = f"https://api.unsplash.com/search/photos?client_id={unsplash_access_key}"
response = requests.get(url=url, params={"query": "black cat, blanket"})
img_dict = response.json()
print(img_dict["results"][0]["urls"])

{'raw': 'https://images.unsplash.com/photo-1512528368222-38761bbe68dd?ixid=M3w2OTIwNTB8MHwxfHNlYXJjaHwxfHxibGFjayUyMGNhdCUyQyUyMGJsYW5rZXR8ZW58MHx8fHwxNzM1NzI5MTQ2fDA&ixlib=rb-4.0.3', 'full': 'https://images.unsplash.com/photo-1512528368222-38761bbe68dd?crop=entropy&cs=srgb&fm=jpg&ixid=M3w2OTIwNTB8MHwxfHNlYXJjaHwxfHxibGFjayUyMGNhdCUyQyUyMGJsYW5rZXR8ZW58MHx8fHwxNzM1NzI5MTQ2fDA&ixlib=rb-4.0.3&q=85', 'regular': 'https://images.unsplash.com/photo-1512528368222-38761bbe68dd?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w2OTIwNTB8MHwxfHNlYXJjaHwxfHxibGFjayUyMGNhdCUyQyUyMGJsYW5rZXR8ZW58MHx8fHwxNzM1NzI5MTQ2fDA&ixlib=rb-4.0.3&q=80&w=1080', 'small': 'https://images.unsplash.com/photo-1512528368222-38761bbe68dd?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w2OTIwNTB8MHwxfHNlYXJjaHwxfHxibGFjayUyMGNhdCUyQyUyMGJsYW5rZXR8ZW58MHx8fHwxNzM1NzI5MTQ2fDA&ixlib=rb-4.0.3&q=80&w=400', 'thumb': 'https://images.unsplash.com/photo-1512528368222-38761bbe68dd?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w2OTIw

### Sentence, Word Sim Test

In [19]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.5.1-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.6.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting scipy (from sentence-transformers)
  Downloading scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting Pillow (from sentence-transformers)
  Downloading pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.1 kB)
Collecting filelock (from huggingface-hub>=0.20.0->sentence-transformers)
  Downloading filelock-3.16.

In [29]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# 모델 로드
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [32]:
sentences = results["rewrite_article"]
sentence_embeddings = model.encode(sentences)

In [33]:
word_embedding = []
for word in results["keywords"]:
    word_embedding.append(model.encode(word))

In [50]:
import numpy as np

word_sims = []
# 4. 코사인 유사도 계산
for em in word_embedding:
    data = np.array(em)
    print(f"origin shape: {data.shape}")
    reshaped_data = data.reshape(1, -1)
    print(f"reshaped shape: {reshaped_data.shape}")
    sim = cosine_similarity(reshaped_data, sentence_embeddings)
    # word_sims.append(sim)

# # 5. 결과 출력
# for i, sentence in enumerate(word_sims):
#     print(f"Similarity between '{word}' and 'sentence': {word_sims[0][i]:.4f}")

origin shape: (384,)
reshaped shape: (1, 384)


ValueError: Expected 2D array, got 1D array instead:
array=[-2.75533255e-02  6.37150183e-02 -2.52408516e-02 -8.55092052e-03
  7.04711676e-02 -2.82979794e-02  1.53779322e-02 -7.77567178e-02
  7.08258823e-02 -4.10191454e-02  7.30010420e-02  1.02148131e-02
  1.23317473e-01  2.69446149e-02  3.53877135e-02  3.31917778e-02
  2.52374019e-02 -4.03871909e-02 -8.33430588e-02  1.66621059e-02
 -5.44863157e-02  1.65747311e-02  2.40899492e-02 -1.16899703e-02
 -3.10931690e-02 -1.40323145e-02  2.61776429e-02  3.19667943e-02
 -1.11051418e-01  3.19443457e-02  1.21237926e-01 -3.43433139e-03
 -1.13666110e-01  3.55782476e-03 -9.41712235e-04 -2.89851557e-02
  5.18659316e-02  6.33020177e-02  7.52802044e-02  4.34009731e-02
  3.19648832e-02 -1.29360303e-01 -2.55632289e-02 -2.19880100e-02
  7.79529987e-03  6.70087943e-03 -3.76795791e-02  1.26106944e-03
  1.22850567e-01 -3.25954184e-02 -6.08828887e-02  4.76699769e-02
  4.93462645e-02 -4.42912243e-02  2.68398020e-02 -1.59823805e-01
  1.42589388e-02  9.12939943e-03  4.01948951e-02  1.79209467e-02
  2.12710500e-02 -7.39410403e-04  3.63559388e-02  1.27320737e-02
  4.50965390e-02 -5.26050255e-02  4.50300351e-02 -5.42758964e-02
  1.71141345e-02 -1.61122158e-02  9.41250846e-02  7.89568108e-03
  2.78916638e-02 -3.14854495e-02 -9.53447726e-03 -9.78534296e-02
  3.66094224e-02  6.65014088e-02  1.74904522e-02  1.23892375e-03
  7.14929327e-02 -1.18928157e-01  9.78256669e-03 -2.93708295e-02
  2.06816345e-02  1.61951557e-02 -5.10189757e-02  6.02687225e-02
 -4.48641144e-02  1.07383251e-01 -1.22469082e-01 -2.59145000e-03
  3.82824726e-02  3.20542343e-02  8.44619349e-02 -3.25924307e-02
 -2.82019824e-02  5.53963520e-02 -7.68260211e-02  9.61710587e-02
 -2.65522413e-02  2.02332754e-02 -2.24901154e-03 -4.56937328e-02
  3.82415205e-02 -8.61135498e-03 -4.42011133e-02  5.70963956e-02
 -6.10749312e-02 -4.19958355e-03  5.44668436e-02  2.70531303e-03
 -4.02126908e-02 -6.63425326e-02  4.06059623e-02 -2.06782203e-03
 -2.78140791e-02  3.59866321e-02 -6.03341125e-02 -8.16047862e-02
  7.61415958e-02  1.37754506e-03 -4.72525513e-04  3.61822955e-02
 -3.09435127e-04  1.76484752e-02 -9.21921134e-02  1.01081466e-33
  7.50478283e-02 -1.79379322e-02  1.94553994e-02  1.15485322e-02
  5.73092978e-03 -5.57789858e-03 -1.15637518e-01  7.05342740e-02
 -4.57275128e-05 -3.13915499e-03 -2.03852393e-02 -4.68001701e-02
 -2.94459774e-03 -2.83514522e-02 -3.40746134e-03  8.16882029e-03
 -8.54135156e-02 -3.36979069e-02 -1.08120076e-01 -4.51242700e-02
  2.07578447e-02  1.41593115e-03  1.82613395e-02  8.03741589e-02
 -2.73605119e-02  2.53390912e-02 -5.09775756e-03  1.10387802e-02
 -3.54986824e-02  1.42523488e-02 -9.83711109e-02  5.89127056e-02
 -1.24972714e-02  3.68928500e-02  3.48567516e-02 -1.39615731e-02
 -2.52514202e-02  3.28614679e-03 -6.94339871e-02 -8.78654197e-02
  8.01606849e-03  4.40034568e-02 -3.39694992e-02  4.35793251e-02
 -4.71944846e-02  2.67292820e-02 -3.37289274e-02 -3.95496227e-02
 -1.90124810e-02  4.14350331e-02  5.77613013e-03 -1.25340009e-02
 -2.04767734e-02 -5.92977814e-02 -6.64823316e-03  1.02781942e-02
 -3.54824215e-03 -3.02227493e-02  1.11085385e-01  2.58415863e-02
  1.89773887e-02  7.47390240e-02 -4.90469448e-02  8.81263614e-02
 -6.35019690e-02 -8.09761956e-02  4.60865349e-02  4.74857911e-02
 -5.49924467e-03  3.77752818e-02  9.23328400e-02  5.50628752e-02
  6.64286837e-02 -4.73497026e-02 -1.41153792e-02 -1.28921410e-02
  2.34952122e-02  1.93983279e-02 -3.06507889e-02  2.03695297e-02
  1.06161736e-01 -2.21184641e-02  1.55823141e-01 -2.22325362e-02
 -9.46694687e-02  8.08076411e-02 -1.16674067e-03 -4.70693670e-02
 -3.19461972e-02  4.67446297e-02 -1.21909328e-01  3.18799242e-02
 -1.48066203e-04 -2.26216707e-02 -7.30692819e-02 -3.19837503e-33
 -3.75995263e-02 -1.12257032e-02  2.29634698e-02  8.11092183e-03
  3.14309373e-02 -7.10809231e-02 -2.66142213e-03  2.93724257e-02
  7.20130131e-02 -1.48690883e-02  1.49676334e-02 -3.22519951e-02
  5.17595150e-02  1.00081144e-02  2.79592406e-02  1.20338062e-02
  4.55085076e-02 -2.12161262e-02 -1.28283739e-01  2.25771964e-02
  3.26584168e-02  5.92346750e-02 -9.19957384e-02  2.90275291e-02
 -4.02241247e-03  2.04686634e-02  1.21404804e-01  2.90032243e-04
 -1.10665821e-02  9.72246577e-04 -5.23746088e-02 -5.70677454e-03
  2.17125118e-02  2.50010975e-02 -2.29241457e-02  7.61915520e-02
  6.34380504e-02 -1.51037741e-02 -2.80599631e-02 -2.81196479e-02
  6.44013286e-02  9.39551517e-02 -1.08282164e-01  5.45298606e-02
 -2.96382010e-02  2.08873171e-02 -1.10744534e-03  4.07571830e-02
  2.09289193e-02 -3.29742581e-02 -4.44719009e-03 -1.12818731e-02
 -6.33298755e-02  4.87028472e-02 -2.02213954e-02  1.87084246e-02
 -3.33202146e-02 -1.28719294e-02  2.80441232e-02 -5.45080565e-02
 -3.03413346e-02 -2.15118024e-02  1.53688854e-02 -2.72155777e-02
 -2.72871740e-02 -6.18901439e-02 -3.56067233e-02 -5.99935688e-02
  1.78483129e-02  2.83110663e-02  7.93349594e-02 -4.07687612e-02
 -9.47016999e-02 -6.35947883e-02  3.85787035e-03 -1.90876257e-02
 -8.94213617e-02  9.20831040e-02 -1.10829018e-01  4.32416722e-02
  3.87830809e-02 -8.08643252e-02 -8.82882848e-02  3.69672924e-02
  8.64755884e-02  4.21298966e-02  7.30466247e-02 -5.73409945e-02
 -2.98779458e-02  1.69533473e-02 -2.39326227e-02 -1.89236123e-02
 -7.40233250e-03  3.13493833e-02 -9.53825787e-02 -6.02422787e-08
 -2.31089685e-02  4.35194708e-02 -5.23514338e-02 -1.07226027e-02
  4.49450910e-02 -1.57246643e-04  2.97843060e-03 -4.21910249e-02
  1.64568182e-02  7.06852749e-02  6.48231208e-02  4.51062992e-02
 -1.52686015e-02  7.01143034e-03 -6.95999637e-02  5.81061235e-03
  2.91646197e-02 -5.52076905e-04 -1.87217779e-02  2.26977300e-02
  5.53162210e-02  2.16752831e-02  1.26820672e-02  6.38897493e-02
  8.67681950e-02 -1.01083694e-02 -1.08629085e-01 -9.12033021e-03
  3.88141796e-02 -2.58480776e-02 -1.04637489e-01  2.96364538e-03
 -6.71663061e-02 -1.11299314e-01 -5.85219637e-02 -3.95294046e-03
  2.80290898e-02 -1.83113441e-02  6.45842701e-02  1.19829727e-02
 -4.06581350e-02  1.65143551e-03  7.15402234e-03  4.71293405e-02
 -3.13038342e-02 -2.69961972e-02 -5.79379164e-02 -6.69225231e-02
 -5.02572171e-02 -1.55542698e-02  6.60322607e-02 -9.40919481e-03
  7.63239013e-03  5.35513908e-02  1.25469025e-02  1.59839541e-02
  7.07818335e-03 -4.34494065e-03  8.66963938e-02 -5.51419966e-02
  3.74021493e-02 -5.70273101e-02  1.31372397e-03  3.75000504e-03].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.