In [19]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch

In [20]:

# Path to your model directory
model_path = "./distilbert_model4"
general_model_path = "./general_model"

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(model_path)
general_tokenizer = DistilBertTokenizer.from_pretrained(general_model_path)

# Load the model
model = DistilBertForSequenceClassification.from_pretrained(model_path)
general_model = DistilBertForSequenceClassification.from_pretrained(general_model_path)

In [21]:



# Example: Classify a news article
def classify_news(text, tokenizer, model, max_length=512):
    """
    Classify news content as "authentic" or "fake".

    Parameters:
        text (str): The news content to classify.
        tokenizer: The DistilBERT tokenizer.
        model: The fine-tuned DistilBERT model.
        max_length (int): Maximum length of the input sequence.

    Returns:
        str: "authentic" or "fake".
    """ 
    # Preprocess the input text
    inputs = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt"  # Return PyTorch tensors
    )

    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()
        
    print("-----")
    print("This is the value: ")
    print(predicted_class)
    

    # Map predicted class index to label
    labels = ["fake", "authentic"]  # Ensure this matches your training labels
    return labels[predicted_class]



In [22]:
# Example: Classify a news article
def classify_health_news(text, tokenizer, model, max_length=512):
    """
    Classify news content as "authentic" or "fake".

    Parameters:
        text (str): The news content to classify.
        tokenizer: The DistilBERT tokenizer.
        model: The fine-tuned DistilBERT model.
        max_length (int): Maximum length of the input sequence.

    Returns:
        str: "authentic" or "fake".
    """ 
    # Preprocess the input text
    inputs = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt"  # Return PyTorch tensors
    )

    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()
        
    print("-----")
    print("This is the value: ")
    print(predicted_class)

    if predicted_class == 1:
        classification_result = classify_news(text, tokenizer, model)
        print(f"The news is classified as: {classification_result}")


    # Map predicted class index to label
    labels = ["general", "health"]  # Ensure this matches your training labels
    return labels[predicted_class]

In [None]:
if __name__ == "__main__":
    news_content = """
SM Supermalls and the Estée Lauder Companies (ELC) stand united in their mission to educate, empower, and support those affected by breast cancer, kicking off the numerous activities in store for Breast Cancer Awareness Month on October 3, 2024, at SM Aura’s Upper Ground Atrium.Following the theme “Beautifully United to Help End Breast Cancer,” Estée Lauder partnered with SM Supermalls to launch an impactful month dedicated to raising awareness and providing essential resources in the fight against breast cancer.The press event was meant to bring together, educate, and inspire advocates, medical experts, and community members.“At SM Supermalls, we recognize our responsibility to support and empower women and women’s health. As a society, we must work together to enhance breast cancer awareness and response. By promoting a supportive community, advocating for better access to healthcare, and empowering individuals with knowledge, we can make a significant impact,” said SM Supermalls President Steven Tan.The press event featured insightful talks from esteemed speakers, including Dr. Helen Amo, a surgical oncologist, who shared crucial information about the disease. Celebrity fashion stylist and inspirational speaker Kat Cruz moved attendees with her personal story of embracing courage in the face of breast cancer, illustrating the hope and resilience of those affected. Editor Chit Lijauco also shared her experience as a breast cancer patient and victor.Additionally, free breast health exams were offered to the mall-goers of SM Aura as part of the push for early detection and regular screening.Landmarks such as the SM Mall of Asia Globe and facade, SM Aura façade, SM Megamall’s Time Sculpture, façade, and Mega Tower, SM Lanang in Davao façade and fountain, and SM Seaside City Cebu’s façade and The Cube, were also illuminated in pink, sending a powerful message of solidarity.Through these collaborative efforts, SM Supermalls is not just raising awareness for breast cancer—it is also providing vital resources that can lead to early detection and improved health outcomes. For more information on SM Supermalls and to stay updated, visit www.smsupermalls.com or follow SM Supermalls on Facebook.

"""
    # # Classify the news
    # result = classify_news(news_content, tokenizer, model)
    # print(f"The news is classified as: {result}")

    result = classify_health_news(news_content, general_tokenizer, general_model)
    print(f"The news is classified as: {result}")

-----
This is the value: 
0
The news is classified as: general


In [24]:
# import pandas as pd

# # Make sure `classify_news`, `tokenizer`, and `model` are defined/imported earlier

# if __name__ == "__main__":
#     # Load your CSV
#     df = pd.read_csv(r"annotated_dataset4.csv")

#     for index, row in df.iterrows():
#         news_content = row['content']
#         expected_annotation = row.get('annotation', 'N/A')  # use .get in case the column is missing

#         try:
#             result = classify_news(news_content, tokenizer, model)
            
#             print(f"Row {index}")
#             print(f"News Content:\n{news_content[:200]}...")  # show only first 200 characters
#             print(f"Predicted: {result}")
#             print(f"Annotation: {expected_annotation}")
#         except Exception as e:
#             print(f"Error processing row {index}: {e}")
