In [24]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [25]:
df = pd.read_csv("ecommerce_furniture_dataset_2024.csv")

In [26]:
df.head()

Unnamed: 0,productTitle,originalPrice,price,sold,tagText
0,Dresser For Bedroom With 9 Fabric Drawers Ward...,,$46.79,600,Free shipping
1,Outdoor Conversation Set 4 Pieces Patio Furnit...,,$169.72,0,Free shipping
2,Desser For Bedroom With 7 Fabric Drawers Organ...,$78.4,$39.46,7,Free shipping
3,"Modern Accent Boucle Chair,Upholstered Tufted ...",,$111.99,0,Free shipping
4,Small Unit Simple Computer Desk Household Wood...,$48.82,$21.37,1,Free shipping


In [27]:
# Change column names
df = df.rename(columns={'productTitle': 'ProductTitle', 'originalPrice': 'OriginalPrice', 'price': 'SellingPrice', 'sold': 'UnitsSold', 'tagText': 'TagText'})

In [28]:
df.isnull().sum()

ProductTitle        0
OriginalPrice    1513
SellingPrice        0
UnitsSold           0
TagText             3
dtype: int64

In [29]:
# Dropping OriginalPrice column 
df=df.drop(['OriginalPrice'], axis=1)

In [30]:
# change null values to 0
df.fillna(0, inplace=True)

In [31]:
# Remove '$' and ',' symbols from 'SellingPrice' and 'OriginalPrice' columns
df['SellingPrice'] = df['SellingPrice'].astype(str).str.replace('$', '', regex=False).str.replace(',', '', regex=False)

# Convert 'SellingPrice' and 'OriginalPrice' to numeric
df['SellingPrice'] = pd.to_numeric(df['SellingPrice'])

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ProductTitle  2000 non-null   object 
 1   SellingPrice  2000 non-null   float64
 2   UnitsSold     2000 non-null   int64  
 3   TagText       2000 non-null   object 
dtypes: float64(1), int64(1), object(2)
memory usage: 62.6+ KB


In [33]:
df.describe()

Unnamed: 0,SellingPrice,UnitsSold
count,2000.0,2000.0
mean,156.56002,23.4935
std,176.936735,254.094061
min,0.99,0.0
25%,48.53,1.0
50%,114.08,3.0
75%,193.49,9.0
max,2876.38,10000.0


In [34]:
df.shape

(2000, 4)

In [35]:
# Add new column TotalCost = sellingprice*unitsSold
df['TotalCost'] = df['SellingPrice'] * df['UnitsSold']

In [38]:
# Most expensive product in stock
df[df['SellingPrice'] == df['SellingPrice'].max()]

Unnamed: 0,ProductTitle,SellingPrice,UnitsSold,TagText,TotalCost
656,"Luxury Modern Tight Curved Back Velvet Sofa,Mi...",2876.38,0,Free shipping,0.0


In [37]:
# Most expensive product sold
df[df['TotalCost'] == df['TotalCost'].max()]

Unnamed: 0,ProductTitle,SellingPrice,UnitsSold,TagText,TotalCost
1008,Portable round Folding Chair Accordion Chair H...,12.28,10000,Free shipping,122800.0


In [40]:
# Least priced products list
df[df['SellingPrice'] == df['SellingPrice'].min()]

Unnamed: 0,ProductTitle,SellingPrice,UnitsSold,TagText,TotalCost
597,1PC Mini House Night Table Model Wooden Mini B...,0.99,7,Free shipping,6.93
958,Portable Wardrobe Closet Heavy Duty Hanger Fre...,0.99,2,Free shipping,1.98
1095,New Wholesale Simple Shoe Rack Multi-layer Spa...,0.99,43,Free shipping,42.57
1373,1 folding chair for home and outdoor use Conve...,0.99,85,Free shipping,84.15
1498,"Office Desk Mat,Non-Slip PU Leather Desk Blott...",0.99,32,Free shipping,31.68
1567,1pc Sequin Inflatable Sofa Colorfull Sequin La...,0.99,26,Free shipping,25.74
1583,Card Folding Stool Portable Outdoor Camping an...,0.99,41,Free shipping,40.59
1586,1/12 Scale mini house Miniature Round Wooden C...,0.99,23,Free shipping,22.77
