# 🛋️ E-commerce Furniture Dataset Project
This notebook analyzes and models sales data from an online furniture dataset.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [None]:
# Load dataset
df = pd.read_csv('ecommerce_furniture_dataset.csv')

# Show first few rows and dataset shape
df.head(), df.shape


In [None]:
# Check missing values
df.isnull().sum()


In [None]:
# Drop rows with missing values
df = df.dropna()
df.shape


In [None]:
# Clean price columns: remove $ sign and convert to float
df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)
df['originalPrice'] = df['originalPrice'].replace('[\$,]', '', regex=True).astype(float)

# Create discount percentage feature
df['discount_percent'] = ((df['originalPrice'] - df['price']) / df['originalPrice']) * 100

# Encode tagText column
le = LabelEncoder()
df['tagText_encoded'] = le.fit_transform(df['tagText'])

df.head()


In [None]:
# Scatter plot of discount vs sold
plt.figure(figsize=(8,5))
sns.scatterplot(x='discount_percent', y='sold', data=df)
plt.title('Discount % vs Items Sold')
plt.xlabel('Discount Percentage')
plt.ylabel('Items Sold')
plt.show()


In [None]:
# Select features and target
X = df[['price', 'discount_percent', 'tagText_encoded']]
y = df['sold']

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict and evaluate
y_pred_lr = lr_model.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Linear Regression Results:")
print(f"MSE: {mse_lr:.2f}")
print(f"R2 Score: {r2_lr:.2f}")


In [None]:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict and evaluate
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("Random Forest Results:")
print(f"MSE: {mse_rf:.2f}")
print(f"R2 Score: {r2_rf:.2f}")
