In [1]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
import pandas as pd
df = pd.read_csv("Dataset/sentiment.csv", encoding="ISO-8859-1")

In [2]:
df

Unnamed: 0,Sentiment,News
0,neutral,"According to Gran , the company has no plans t..."
1,neutral,Technopolis plans to develop in stages an area...
2,negative,The international electronic industry company ...
3,positive,With the new production plant the company woul...
4,positive,According to the company 's updated strategy f...
...,...,...
4841,negative,LONDON MarketWatch -- Share prices ended lower...
4842,neutral,Rinkuskiai 's beer sales fell by 6.5 per cent ...
4843,negative,Operating profit fell to EUR 35.4 mn from EUR ...
4844,negative,Net sales of the Paper segment decreased to EU...


In [5]:
# Split data
X = df['News']
y = df['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Compute class weights
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))

# Create pipeline with Logistic Regression
model = make_pipeline(
    TfidfVectorizer(ngram_range=(1,2), max_df=0.95, min_df=5),
    LogisticRegression(class_weight=class_weights, max_iter=1000, solver='liblinear')
)

# Train
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(" Evaluation:\n")
print(classification_report(y_test, y_pred))

# Predict
frase = "Sales decreased"
sentiment = model.predict([frase])[0]
print("\n Example frase:", frase)
print(" Predicted Sentiment:", sentiment)



 Evaluation:

              precision    recall  f1-score   support

    negative       0.75      0.66      0.71       110
     neutral       0.79      0.91      0.85       571
    positive       0.79      0.59      0.67       289

    accuracy                           0.79       970
   macro avg       0.78      0.72      0.74       970
weighted avg       0.78      0.79      0.78       970


 Example frase: Sales decreased
 Predicted Sentiment: negative
