In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


In [None]:
# Step 1: Load and preprocess the data
df = pd.read_excel('train.xlsx')

In [None]:
# Clean the data (remove duplicates, handle missing values if any)
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

In [None]:
# Step 2: Encode categorical labels
label_map = {'positive': 2, 'neutral': 1, 'negative': 0}  # Assuming this mapping based on sentiment values

df['Sentiment'] = df['Sentiment'].apply(lambda x: label_map[x])


In [None]:
# Step 3: Train-test split
X_train, X_val, y_train, y_val = train_test_split(df['News Headline'], df['Sentiment'], test_size=0.2, random_state=42)


In [None]:
# Step 4: Text preprocessing and vectorization
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')  # Adjust max_features based on vocabulary size
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)

In [None]:
# Step 5: Choose a classification model and train
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Step 6: Predict and evaluate
y_pred = model.predict(X_val_vec)


In [None]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_val, y_pred))
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=label_map.keys()))

Accuracy: 0.6740027510316369

Classification Report:
              precision    recall  f1-score   support

    positive       1.00      0.04      0.07       103
     neutral       0.69      0.97      0.81       425
    negative       0.57      0.38      0.45       199

    accuracy                           0.67       727
   macro avg       0.76      0.46      0.45       727
weighted avg       0.70      0.67      0.61       727



In [None]:
def predict_sentiment(model, vectorizer, headline):
    headline_vec = vectorizer.transform([headline])
    prediction = model.predict(headline_vec)
    for sentiment, label in label_map.items():
        if label == prediction[0]:
            return sentiment

# Example usage:
headline = "Stocks surge as earnings exceed expectations"
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")



Predicted sentiment for 'Stocks surge as earnings exceed expectations': positive


In [None]:
# Example usage:
headline = "The value of the orders is over EUR 25mn ."
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")

Predicted sentiment for 'The value of the orders is over EUR 25mn .': neutral


In [None]:
# Example usage:
headline = "Pretax loss totalled EUR 49.9 mn , compared to a loss of EUR 15.4 mn in the corresponding period in 2008 ."
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")

Predicted sentiment for 'Pretax loss totalled EUR 49.9 mn , compared to a loss of EUR 15.4 mn in the corresponding period in 2008 .': negative


In [None]:
# Example usage:
headline = "Steve Jackson , eBusiness Analyst at Satama and former CEO of Aboavista said : `` Google broke the mold when they gave away Google Analytics for free ."
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")

Predicted sentiment for 'Steve Jackson , eBusiness Analyst at Satama and former CEO of Aboavista said : `` Google broke the mold when they gave away Google Analytics for free .': neutral


In [None]:
# Example usage:
headline = "Operating profit in the fourth quarter went down to EUR3m from EUR4 .2 m for the corresponding period of 2009 as it included costs of growth projects ."
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")

Predicted sentiment for 'Operating profit in the fourth quarter went down to EUR3m from EUR4 .2 m for the corresponding period of 2009 as it included costs of growth projects .': positive


In [None]:
# Example usage:
headline = "A tinyurl link takes users to a scamming site promising that users can earn thousands of dollars by becoming a Google ( NASDAQ : GOOG ) Cash advertiser ."
predicted_sentiment = predict_sentiment(model, vectorizer, headline)
print(f"Predicted sentiment for '{headline}': {predicted_sentiment}")

Predicted sentiment for 'A tinyurl link takes users to a scamming site promising that users can earn thousands of dollars by becoming a Google ( NASDAQ : GOOG ) Cash advertiser .': neutral


In [None]:
import pandas as pd
import datetime

# Dummy function to fetch historical stock prices (replace with actual API or database query)
def fetch_historical_stock_prices(symbol, date):
    # Mock data for illustration
    historical_prices = {
        '2024-01-01': 5.67,
        '2024-01-02': 5.72,
        '2024-01-03': 5.65,
        '2024-01-04': 5.55,
        '2024-01-05': 5.60,
        '2024-01-06': 5.58,
        '2024-01-07': 5.63,
        '2024-01-08': 5.61,
        '2024-01-09': 5.59,
        '2024-01-10': 5.62,
    }
    return historical_prices.get(date, None)  # Return price for specific date or None if not found

# Load dataset (replace with actual path to your Excel file)
file_path = '/content/train.xlsx'  # Update this path if necessary
df = pd.read_excel(file_path)

# Filter headlines with negative sentiment (replace with actual column names as per your dataset)
negative_headlines = df[df['Sentiment'] == 'NEGATIVE'][['News Headline']]

lowest_price = None
lowest_price_date = None

# Function to determine the field Nokia competes with Google in (this is illustrative)
def determine_competing_field():
    return "5G technology"

# Iterate over each headline with negative sentiment
for index, row in negative_headlines.iterrows():
    headline = row['News Headline']

    # Fetch historical stock price for Nokia on this day (assuming a mock function)
    date_of_headline = datetime.datetime.strptime(str(row.name), '%Y-%m-%d').date()  # Adjust as per your data
    historical_price = fetch_historical_stock_prices('NOK', str(date_of_headline))

    if historical_price is not None:
        print(f"On the day with negative sentiment '{headline}', the Nokia share price was ${historical_price:.2f}")

        # Track lowest price
        if lowest_price is None or historical_price < lowest_price:
            lowest_price = historical_price
            lowest_price_date = date_of_headline
    else:
        print(f"No historical price found for Nokia on the day with negative sentiment '{headline}' (Date: {date_of_headline})")

if lowest_price is not None:
    print(f"\nLowest historical Nokia share price on days with negative sentiment was ${lowest_price:.2f} on {lowest_price_date}")
else:
    print("\nNo historical prices found for any negative sentiment headlines.")

# Determine the field in which Nokia competes with Google
competing_field = determine_competing_field()
print(f"Nokia competes with Google in the field of {competing_field}.")



No historical prices found for any negative sentiment headlines.
Nokia competes with Google in the field of 5G technology.
