## Imports

In [15]:
from langchain_ollama import ChatOllama
import utils
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

## Load ata from directories

In [16]:
data_df = utils.load_data_from_directories(articles_dir='./data/news', summaries_dir='./data/summaries')
label_encoder = LabelEncoder()
data_df['category_encoded'] = label_encoder.fit_transform(data_df['category'])

train_df, test_df = train_test_split(data_df, test_size=0.2, random_state=42)
test_texts = test_df['content'].tolist()

allowed_categories = ', '.join(data_df['category'].unique())


## Classify the news articles

In [None]:
llm = ChatOllama(
    model="llama3",
    temperature=0,
)


messages = [
    (
        "system",
        f"You are a helpful assistant that classifies news articles into the following categories: " + 
        "{allowed_categories}. Only output the category name.",
    )
]

for i, article in enumerate(test_texts[:5]):
    messages.append(("human", article))
    ai_msg = llm.invoke(messages)
    predicted_category = ai_msg.content.strip()
    expected_category = test_df.iloc[i]['category']
    if predicted_category.lower() in allowed_categories.split(', '):
        print(f'Article: ********\n  {article}\n ******** \n '
              f'Category: {predicted_category}, Expected: {expected_category}\n')
    else:
        print(f'Article: ********\n  {article}\n ******** \n '
              f'Category: Unknown (Predicted: {predicted_category}), Expected: {expected_category}\n')
    messages.pop()

Article: ********
  News Corp eyes video games market

News Corp, the media company controlled by Australian billionaire Rupert Murdoch, is eyeing a move into the video games market.

According to the Financial Times, chief operating officer Peter Chernin said that News Corp is "kicking the tires of pretty much all video games companies". Santa Monica-based Activison is said to be one firm on its takeover list. Video games are "big business", the paper quoted Mr Chernin as saying. We "would like to get into it".

The success of products such as Sony's Playstation, Microsoft's X-Box and Nintendo's Game Cube have boosted demand for video games.

The days of arcade classics such as Space Invaders, Pac-Man and Donkey Kong are long gone. Today, games often have budgets big enough for feature films and look to give gamers as real an experience as possible. And with their price tags reflecting the heavy investment by development companies, video games are proving almost as profitable as they 