<a href="https://colab.research.google.com/github/HriddhiDoley/Supply_Chain/blob/main/Sales_forecasting_sentiments_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import requests
import feedparser
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Download necessary NLP package
nltk.download('vader_lexicon')

# Load sales data (Favorita Grocery Sales Dataset from Kaggle)
sales_data = pd.read_csv('https://www.kaggleusercontent.com/datasets/rohanrao/store-sales-time-series-forecasting/data.csv')
sales_data = sales_data[['date', 'sales']]
sales_data.columns = ['Date', 'Sales']
sales_data['Date'] = pd.to_datetime(sales_data['Date'])
sales_data.set_index('Date', inplace=True)

# Normalize sales data
scaler = MinMaxScaler()
sales_data['Sales'] = scaler.fit_transform(sales_data[['Sales']])

# Fetch news sentiment from Google News RSS Feed
rss_url = "https://news.google.com/rss/search?q=retail+sales&hl=en-US&gl=US&ceid=US:en"
news_feed = feedparser.parse(rss_url)
news_headlines = [entry.title for entry in news_feed.entries[:10]]

sia = SentimentIntensityAnalyzer()
sentiment_scores = [sia.polarity_scores(headline)['compound'] for headline in news_headlines]
news_sentiment = np.mean(sentiment_scores)  # Aggregate sentiment score

# Add sentiment as a feature
sales_data['Sentiment'] = news_sentiment

# Prepare data for LSTM
sequence_length = 5
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length].values)
        y.append(data.iloc[i+seq_length]['Sales'])
    return np.array(X), np.array(y)

X, y = create_sequences(sales_data, sequence_length)

# Split into training and testing sets
split = int(0.8 * len(X))
X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]

# Build LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, 2)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))

# Predict and plot
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))
actual = scaler.inverse_transform(y_test.reshape(-1, 1))

plt.figure(figsize=(10, 5))
plt.plot(actual, label='Actual Sales')
plt.plot(predictions, label='Predicted Sales', linestyle='dashed')
plt.legend()
plt.show()
