# Nexford Final Project: Pricing Optimization using Linear Regression
This notebook documents the full process of building a Linear Regression model to predict product demand (`Units Sold`) based on pricing, discounts, and other factors.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm


In [None]:
# Load the original uncleaned dataset
df = pd.read_csv("retail_store_inventory.csv")
df.head()


## Data Cleaning & Preparation

In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)

# Convert Discount from percentage to decimal
df['Discount'] = df['Discount'] / 100.0

# Optional: Convert categorical variables (like Region) into dummies
df = pd.get_dummies(df, columns=['Region', 'Seasonality', 'Weather Condition'], drop_first=True)

# Features we'll use for modeling
features = ['Price', 'Discount', 'Competitor Pricing', 'Holiday/Promotion'] +            [col for col in df.columns if 'Region_' in col or 'Seasonality_' in col or 'Weather Condition_' in col]
X = df[features]
y = df['Units Sold']


## Linear Regression Model

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("R-squared:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))


## Confidence Intervals with OLS

In [None]:
X_train_ols = sm.add_constant(X_train)
ols_model = sm.OLS(y_train, X_train_ols).fit()
print(ols_model.summary())


## Predict Demand Function

In [None]:
def predict_demand(price, discount, competitor_price, holiday, region_dummy, season_dummy, weather_dummy):
    input_dict = {
        'Price': price,
        'Discount': discount,
        'Competitor Pricing': competitor_price,
        'Holiday/Promotion': holiday
    }

    # Add dummy fields with default 0
    for col in X.columns:
        if col not in input_dict:
            input_dict[col] = 0

    # Update based on selected dummies
    input_dict.update(region_dummy)
    input_dict.update(season_dummy)
    input_dict.update(weather_dummy)

    input_df = pd.DataFrame([input_dict])
    return model.predict(input_df)[0]

# Example usage
predict_demand(30, 0.1, 28, 1, {'Region_North':1}, {'Seasonality_Winter':1}, {'Weather Condition_Sunny':1})


## Conclusion
- This model helps estimate product demand based on pricing, competitor prices, and other contextual variables.
- The `predict_demand()` function enables user input integration in an app.
- Confidence intervals from the OLS model provide insight into the reliability of estimates.
- This notebook is ready for integration into a Streamlit app.