In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df = pd.read_csv("bbc_news_dataset.csv")


In [None]:
category_names = df["Category"].unique()
print(category_names)
value_count = df["Category"].value_counts()

In [None]:
plt.bar(x=category_names,height=value_count)

In [None]:
X = df["Text"]
y = df["Category"]

In [None]:
from wordcloud import WordCloud
for category in category_names:
    text = " ".join(df[df["Category"] == category]['Text'].values)
    wc = WordCloud(width=800,height=400,background_color='white').generate(text=text)
    plt.figure(figsize=(8,4))
    plt.imshow(wc,interpolation='bilinear')
    plt.title(f'WordCloud for {category}')
    plt.axis('off')
    plt.savefig(f'WordCloud for {category}.png')
    plt.show()
    
    
    

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


In [None]:
model = make_pipeline(TfidfVectorizer(stop_words='english'),MultinomialNB())
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))



In [None]:
matrix = confusion_matrix(y_test,y_pred)
matrix_plot = ConfusionMatrixDisplay(matrix,display_labels=category_names)
matrix_plot.plot()
plt.savefig("hello.png")

In [None]:

test_text = ["Senior US and Iranian officials are expected to meet in Oman later for direct talks amid a crisis that has raised fears of a military confrontation between the two countries.The discussions come after a US military build-up in the Middle East in response to Iran's violent repression of nationwide anti-government protests last month, that human rights groups say killed many thousands of people.Uncertainty over the location and scope had threatened to derail the talks, which are part of a diplomatic effort by regional mediators to de-escalate tensions."
]

predections = model.predict(test_text)
for text, predection in zip(test_text,predections):
    print(f"Text: {text[:20]}... | Predicted {predection}")




In [None]:
import joblib
joblib.dump(model,"bbc_model.joblib")