<a href="https://colab.research.google.com/github/GunanaDeepthi/INFOSYS-WEBSCRAPING/blob/main/app.py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import json
from datetime import datetime
import pandas as pd
import plotly.express as px
import streamlit as st
from openai import AzureOpenAI
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA
from transformers import pipeline
import requests

API_KEY = ""  # Groq API Key
SLACK_WEBHOOK = ""  # Slack webhook URL


def truncate_text(text, max_length=512):
    return text[:max_length]


def load_competitor_data():
    """Load competitor data from a CSV file."""
    data = pd.read_csv("competitor_data.csv")
    print(data.head())
    return data


def load_reviews_data():
    """Load reviews data from a CSV file."""
    reviews = pd.read_csv("reviews.csv")
    return reviews


def analyze_sentiment(reviews):
    """Analyze customer sentiment for reviews."""
    sentiment_pipeline = pipeline("sentiment-analysis")
    return sentiment_pipeline(reviews)


def train_predictive_model(data):
    """Train a predictive model for competitor pricing strategy."""
    data["Discount"] = data["Discount"].str.replace("%", "").astype(float)  # Assuming Discount column has values like "10%"
    data["Price"] = data["Price"].astype(float)

    # Assuming the desired discount is related to the price
    data["Predicted Discount"] = data["Discount"] + (data["Price"] * 0.05).round(2)

    x = data[["Price", "Discount"]]  # Features
    y = data["Predicted Discount"]  # Target variable
    print(x)

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=42
    )

    model = RandomForestRegressor(random_state=42)  # Using Random Forest for regression
    model.fit(x_train, y_train)

    return model


def forecast_discounts_arima(data, future_days=5):
    """
    Forecast future discounts using ARIMA.

    Args:
        data: DataFrame containing historical discount data (with a datetime index).
        future_days: Number of days to forecast.

    Returns:
        DataFrame with historical and forecasted discounts.
    """
    data = data.sort_index()
    data["Discount"] = pd.to_numeric(data["Discount"], errors="coerce")
    data = data.dropna(subset=["Discount"])

    discount_series = data["Discount"]

    if not isinstance(data.index, pd.DatetimeIndex):
        try:
            data.index = pd.to_datetime(data.index)
        except Exception as e:
            raise ValueError("Index must be datetime or convertible to datetime.") from e

    model = ARIMA(discount_series, order=(5, 1, 0))
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=future_days)
    future_dates = pd.date_range(
        start=discount_series.index[-1] + pd.Timedelta(days=1), periods=future_days
    )

    forecast_df = pd.DataFrame({"Date": future_dates, "Predicted Discount": forecast})
    return forecast_df


def send_to_slack(data):
    """Send strategic recommendations to Slack."""
    payload = {"text": data}
    requests.post(SLACK_WEBHOOK, data=json.dumps(payload))


def generate_strategy_recommendation(product_name, competitor_data, sentiment):
    """
    Generate strategic recommendations using the provided data.
    """
    date = datetime.now()
    prompt = f"""
        You are a highly skilled business strategist specializing in e-commerce. Based on the following details, suggest actionable strategies to optimize pricing, promotions, and customer satisfaction:

        1. **Product Name**: {product_name}
        2. **Competitor Data** (including current prices, discounts, and predicted discounts):
        {competitor_data}
        3. **Sentiment Analysis**: {sentiment}
        4. **Today's Date**: {str(date)}

        ### Tasks:
        - Analyze the competitor data and identify key pricing trends.
        - Leverage sentiment analysis insights to highlight areas where customer satisfaction can be improved.
        - Use the discount predictions to suggest how pricing strategies can be optimized over the next 5 days.
        - Recommend promotional campaigns or marketing strategies that align with customer sentiments and competitive data.
        - Ensure the strategies are actionable, realistic, and geared toward increasing customer satisfaction and driving sales.
    """

    messages = [{"role": "user", "content": prompt}]
    data = {
        "messages": messages,
        "model": "llama3-ob-8192",
        "temperature": 0.8,
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_KEY}",
    }
    res = requests.post(
        "https://api.groq.com/openai/v1/chat/completions",
        data=json.dumps(data),
        headers=headers,
    )
    response = res.json()
    return response["choices"][0]["message"]["content"]


# Streamlit Dashboard
st.set_page_config(page_title="E-Commerce Competitor Strategy Dashboard", layout="wide")

st.title("E-Commerce Competitor Strategy Dashboard")

st.sidebar.header("Select a Product")

products = [
    "Apple iPhone 15",
    "Apple 2023 MacBook Pro (16-inch, Apple M3 Pro chip)",
    "OnePlus Nord 4 5G (Mercurial Silver, 8GB RAM, 256GB Storage)",
    "Sony WH-1000XM5 Best Active Noise Cancelling Wireless Headphones",
]
selected_product = st.sidebar.selectbox("Choose a product to analyze:", products)

competitor_data = load_competitor_data()
reviews_data = load_reviews_data()

product_data = competitor_data[competitor_data["product_name"] == selected_product]
product_reviews = reviews_data[reviews_data["product_name"] == selected_product]

st.header(f"Competitor Analysis for {selected_product}")

st.subheader("Competitor Data")
st.table(product_data.tail(5))

if not product_reviews.empty:
    product_reviews["reviews"] = product_reviews["reviews"].apply(
        lambda x: truncate_text(x, 512)
    )
    reviews = product_reviews["reviews"].tolist()
    sentiments = analyze_sentiment(reviews)

    st.subheader("Customer Sentiment Analysis")
    sentiment_df = pd.DataFrame(sentiments)
    fig = px.bar(sentiment_df, x="label", title="Sentiment Analysis Results")
    st.plotly_chart(fig)
else:
    st.write("No reviews available for this product.")

product_data["Date"] = pd.to_datetime(product_data["Date"], errors="coerce")
product_data = product_data.dropna(subset=["Date"])
product_data.set_index("Date", inplace=True)
product_data = product_data.sort_index()
product_data["Discount"] = pd.to_numeric(product_data["Discount"], errors="coerce")
product_data = product_data.dropna(subset=["Discount"])

# Forecasting Model
product_data_with_predictions = forecast_discounts_arima(product_data)

st.subheader("Competitor Current and Predicted Discounts")
st.table(product_data_with_predictions.tail(10))

recommendations = generate_strategy_recommendation(
    selected_product,
    product_data_with_predictions,
    sentiments if not product_reviews.empty else "No reviews available",
)

st.subheader("Strategic Recommendations")
st.write(recommendations)

send_to_slack(recommendations)




FileNotFoundError: [Errno 2] No such file or directory: 'competitor_data.csv'

In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m920.5 kB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
