# **Amazon Product Price & Rating Predictor**

In [None]:
# amazon_price_rating_predictor.py
import pandas as pd
import streamlit as st
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

st.set_page_config(page_title="Amazon Product Price & Rating Predictor", layout="wide")
st.title("Amazon Product Price & Rating Predictor")

# --- Load Excel directly from path ---
try:
    df = pd.read_excel("/content/amazon_products_sample.xlsx")
    st.success("Excel data loaded successfully!")
except Exception as e:
    st.error("Error loading Excel. Make sure '/content/amazon_products_sample.xlsx' exists.")
    st.stop()

# --- Preprocessing ---
df_model = df.copy()
le_cat = LabelEncoder()
le_brand = LabelEncoder()
df_model['Category_Code'] = le_cat.fit_transform(df_model['Category'])
df_model['Brand_Code'] = le_brand.fit_transform(df_model['Brand'])

features = ['Category_Code','Brand_Code','Features_Count','Reviews_Count','Discount_pct']
X_price = df_model[features]
y_price = df_model['Price_Rs']

X_rating = df_model[features]
y_rating = df_model['Rating']

# --- Train-Test Split ---
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X_price,y_price,test_size=0.2,random_state=42)
X_train_rating, X_test_rating, y_train_rating, y_test_rating = train_test_split(X_rating,y_rating,test_size=0.2,random_state=42)

# --- Scaling ---
scaler_price = StandardScaler()
X_train_price_scaled = scaler_price.fit_transform(X_train_price)
X_test_price_scaled = scaler_price.transform(X_test_price)

scaler_rating = StandardScaler()
X_train_rating_scaled = scaler_rating.fit_transform(X_train_rating)
X_test_rating_scaled = scaler_rating.transform(X_test_rating)

# --- Model Training ---
model_price = RandomForestRegressor(n_estimators=200, random_state=42)
model_price.fit(X_train_price_scaled, y_train_price)

model_rating = RandomForestRegressor(n_estimators=200, random_state=42)
model_rating.fit(X_train_rating_scaled, y_train_rating)

# --- Input Form for Prediction ---
st.subheader("Predict Price & Rating for a New Product")
with st.form("predict_form"):
    category_input = st.selectbox("Category", sorted(df['Category'].unique()))
    brand_input = st.selectbox("Brand", sorted(df['Brand'].unique()))
    features_count = st.number_input("Number of Features", min_value=1, max_value=20, value=3)
    reviews_count = st.number_input("Reviews Count", min_value=0, max_value=50000, value=100)
    discount_pct = st.slider("Discount (%)", 0, 100, 10)
    submitted = st.form_submit_button("Predict Price & Rating")

if submitted:
    cat_code = le_cat.transform([category_input])[0]
    brand_code = le_brand.transform([brand_input])[0]
    X_new = [[cat_code, brand_code, features_count, reviews_count, discount_pct]]

    X_new_price_scaled = scaler_price.transform(X_new)
    X_new_rating_scaled = scaler_rating.transform(X_new)

    predicted_price = model_price.predict(X_new_price_scaled)[0]
    predicted_rating = model_rating.predict(X_new_rating_scaled)[0]

    st.success(f"Predicted Price: ₹{predicted_price:.2f}")
    st.success(f"Predicted Rating: {predicted_rating:.2f} / 5")


# amazon_price_rating_predictor.py
import pandas as pd
import streamlit as st
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

st.set_page_config(page_title="Amazon Product Price & Rating Predictor", layout="wide")
st.title("🛒 Amazon Product Price & Rating Predictor")

# --- Upload Excel ---
uploaded_file = st.file_uploader("Upload Amazon Products Excel", type=["xlsx","xls"])
if uploaded_file:
    df = pd.read_excel(uploaded_file)
else:
    st.warning("Upload Excel to proceed")
    st.stop()

# --- Preprocessing ---
df_model = df.copy()
# Encode categorical columns
le_cat = LabelEncoder()
le_brand = LabelEncoder()
df_model['Category_Code'] = le_cat.fit_transform(df_model['Category'])
df_model['Brand_Code'] = le_brand.fit_transform(df_model['Brand'])

# Features for prediction
features = ['Category_Code','Brand_Code','Features_Count','Reviews_Count','Discount_pct']
X_price = df_model[features]
y_price = df_model['Price_Rs']

X_rating = df_model[features]
y_rating = df_model['Rating']

# --- Train-Test Split ---
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X_price,y_price,test_size=0.2,random_state=42)
X_train_rating, X_test_rating, y_train_rating, y_test_rating = train_test_split(X_rating,y_rating,test_size=0.2,random_state=42)

# --- Scaling ---
scaler_price = StandardScaler()
X_train_price_scaled = scaler_price.fit_transform(X_train_price)
X_test_price_scaled = scaler_price.transform(X_test_price)

scaler_rating = StandardScaler()
X_train_rating_scaled = scaler_rating.fit_transform(X_train_rating)
X_test_rating_scaled = scaler_rating.transform(X_test_rating)

# --- Model Training ---
model_price = RandomForestRegressor(n_estimators=200, random_state=42)
model_price.fit(X_train_price_scaled, y_train_price)

model_rating = RandomForestRegressor(n_estimators=200, random_state=42)
model_rating.fit(X_train_rating_scaled, y_train_rating)

# --- Input Form for Prediction ---
st.subheader("Predict Price & Rating for a New Product")
with st.form("predict_form"):
    category_input = st.selectbox("Category", sorted(df['Category'].unique()))
    brand_input = st.selectbox("Brand", sorted(df['Brand'].unique()))
    features_count = st.number_input("Number of Features", min_value=1, max_value=20, value=3)
    reviews_count = st.number_input("Reviews Count", min_value=0, max_value=50000, value=100)
    discount_pct = st.slider("Discount (%)", 0, 100, 10)
    submitted = st.form_submit_button("Predict Price & Rating")

if submitted:
    cat_code = le_cat.transform([category_input])[0]
    brand_code = le_brand.transform([brand_input])[0]
    X_new = [[cat_code, brand_code, features_count, reviews_count, discount_pct]]
    
    X_new_price_scaled = scaler_price.transform(X_new)
    X_new_rating_scaled = scaler_rating.transform(X_new)
    
    predicted_price = model_price.predict(X_new_price_scaled)[0]
    predicted_rating = model_rating.predict(X_new_rating_scaled)[0]
    
    st.success(f"Predicted Price: ₹{predicted_price:.2f}")
    st.success(f"Predicted Rating: {predicted_rating:.2f} / 5")



### **1️⃣ Introduction**

E-commerce platforms like Amazon host millions of products across diverse categories. Pricing and product rating play a crucial role in driving customer purchase decisions. Predicting the ideal price and expected rating for a product can help sellers optimize pricing strategies and improve sales, while customers can make informed buying choices.

This project uses **Machine Learning** to predict product price and rating based on features such as category, brand, number of features, reviews count, and discount percentage.

---

### **2️⃣ Objective**

* To predict the **optimal price** of a product using product-specific features.
* To predict the **expected customer rating** for a new product.
* To provide a **user-friendly interface** for sellers or analysts to input product details and get predictions instantly.
* To help businesses make **data-driven pricing and marketing decisions**.

---

### **3️⃣ Dataset Description**

The dataset includes multiple products across various categories with the following columns:

| Column Name    | Description                                            |
| -------------- | ------------------------------------------------------ |
| Product_Name   | Name of the product                                    |
| Category       | Product category (e.g., Electronics, Kitchen, Fashion) |
| Brand          | Brand name                                             |
| Features_Count | Number of features/specifications in the product       |
| Reviews_Count  | Total customer reviews                                 |
| Discount_pct   | Discount offered (%)                                   |
| Price_Rs       | Price of the product in INR                            |
| Rating         | Average customer rating (1–5)                          |

---

### **4️⃣ Methodology**

1. **Data Collection:**
   Gather product data with features, price, and ratings from Amazon (or sample dataset).

2. **Data Preprocessing:**

   * Handle missing values.
   * Encode categorical variables (Category, Brand) using **Label Encoding**.
   * Scale numerical features for consistent ranges.

3. **Feature Selection:**

   * Input features: Category, Brand, Features_Count, Reviews_Count, Discount_pct.
   * Output targets: Price_Rs (for price prediction), Rating (for rating prediction).

4. **Model Training:**

   * Train two separate **Random Forest Regression** models:

     1. Predict product price.
     2. Predict product rating.
   * Split dataset into training and testing sets (80%-20%).

5. **Prediction:**

   * Input new product details via user interface (or programmatically).
   * Predict **expected price** and **rating** using the trained models.

6. **Evaluation:**

   * Evaluate models using **MAE, RMSE, and R² score**.
   * Compare actual vs predicted values to assess accuracy.

---

### **5️⃣ Applications**

* **For Sellers:**
  Set competitive prices and anticipate customer reception.

* **For Customers:**
  Make informed buying decisions based on expected product rating.

* **For E-commerce Analysts:**
  Identify pricing trends, optimize product launches, and increase revenue.

---

### **6️⃣ Advantages**

* Saves time and effort in manual pricing analysis.
* Provides **data-driven insights** for better decision-making.
* Improves customer satisfaction by predicting realistic product ratings.
* Can be extended to multiple product categories and other e-commerce platforms.

---

### **7️⃣ Limitations**

* Accuracy depends on the **quality and quantity** of data.
* External factors like market trends, promotions, and seasonal demand are not captured.
* Ratings are influenced by user behavior, which may be unpredictable.

---

### **8️⃣ Future Scope**

* Integrate **historical sales data** to improve price prediction.
* Include **customer sentiment analysis** from reviews for better rating prediction.
* Deploy as a **real-time web application** for Amazon sellers.
* Use **advanced ML models** like XGBoost or Gradient Boosting for higher accuracy.

