In [137]:
import pandas as pd


In [138]:
df = pd.read_csv('dataset.csv')
df.head()


Unnamed: 0,Item Name,Condition,Bought Months Ago,Demand,Price
0,Apron,Good,2,4,99
1,Lab Coat,Worn Out,18,10,32
2,Apron,Worn Out,12,5,31
3,Sheet Holder,Worn Out,33,5,6
4,Drafter,Like New,12,8,132


In [139]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Item Name          2000 non-null   object
 1   Condition          2000 non-null   object
 2   Bought Months Ago  2000 non-null   int64 
 3   Demand             2000 non-null   int64 
 4   Price              2000 non-null   int64 
dtypes: int64(3), object(2)
memory usage: 78.3+ KB


In [140]:
from sklearn.model_selection import train_test_split

In [141]:
##Splitting the dataset
train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.25, random_state=42)
train_df.shape, val_df.shape, test_df.shape

((1200, 5), (400, 5), (400, 5))

In [142]:
train_df[train_df["Price"] < 0]  # Ensure no negative prices


Unnamed: 0,Item Name,Condition,Bought Months Ago,Demand,Price


In [143]:
#Input and Target Cols
input_cols = list(train_df.columns)[0:-1]
target_col = list(train_df.columns)[-1]
input_cols, target_col

(['Item Name', 'Condition', 'Bought Months Ago', 'Demand'], 'Price')

In [144]:
train_inputs = train_df[input_cols].copy()
train_targets = train_df[target_col].copy()
val_inputs = val_df[input_cols].copy()
val_targets = val_df[target_col].copy()
test_inputs = test_df[input_cols].copy()
test_targets = test_df[target_col].copy()

In [145]:
train_inputs

Unnamed: 0,Item Name,Condition,Bought Months Ago,Demand
940,Drafter,Good,25,8
1594,Lab Coat,Worn Out,14,9
428,Apron,Fair,4,6
1346,Apron,Good,28,6
1933,Drafter,Fair,26,6
...,...,...,...,...
19,Lab Coat,Fair,35,6
659,Drafter,Good,16,9
1172,Drafter,Worn Out,5,7
592,Drafter,Fair,36,8


In [146]:
## seperating numerical and categorical data types
import numpy as np
numerical_cols = train_inputs.select_dtypes(include=np.number).columns.tolist()
categorical_cols = train_inputs.select_dtypes(include='object').columns.tolist()
print(numerical_cols)
print(categorical_cols)

['Bought Months Ago', 'Demand']
['Item Name', 'Condition']


In [147]:
train_df[numerical_cols].describe()

Unnamed: 0,Bought Months Ago,Demand
count,1200.0,1200.0
mean,18.376667,6.446667
std,10.666596,1.792996
min,1.0,3.0
25%,9.0,5.0
50%,18.0,6.0
75%,28.0,8.0
max,36.0,10.0


In [148]:
train_df[numerical_cols].nunique()

Unnamed: 0,0
Bought Months Ago,36
Demand,8


In [149]:
#Onehotencoding
from sklearn.preprocessing import OneHotEncoder

In [150]:
encoder = OneHotEncoder(handle_unknown='ignore')
encoder.fit(df[categorical_cols])

In [151]:
encoder.categories_

[array(['Apron', 'Drafter', 'Lab Coat', 'Sheet Holder'], dtype=object),
 array(['Fair', 'Good', 'Like New', 'Worn Out'], dtype=object)]

In [152]:
import pandas as pd

# Ensure encoded_cols matches transformed output
encoded_cols = list(encoder.get_feature_names_out(categorical_cols))
print(encoded_cols)

# Transform and convert to DataFrame
train_encoded = pd.DataFrame(encoder.transform(train_inputs[categorical_cols]).toarray(), columns=encoded_cols, index=train_inputs.index)
val_encoded = pd.DataFrame(encoder.transform(val_inputs[categorical_cols]).toarray(), columns=encoded_cols, index=val_inputs.index)
test_encoded = pd.DataFrame(encoder.transform(test_inputs[categorical_cols]).toarray(), columns=encoded_cols, index=test_inputs.index)

# Concatenate with original (dropping the original categorical columns)
train_inputs = pd.concat([train_inputs.drop(columns=categorical_cols), train_encoded], axis=1)
val_inputs = pd.concat([val_inputs.drop(columns=categorical_cols), val_encoded], axis=1)
test_inputs = pd.concat([test_inputs.drop(columns=categorical_cols), test_encoded], axis=1)


['Item Name_Apron', 'Item Name_Drafter', 'Item Name_Lab Coat', 'Item Name_Sheet Holder', 'Condition_Fair', 'Condition_Good', 'Condition_Like New', 'Condition_Worn Out']


In [155]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(train_inputs, train_targets)

In [156]:
def rmse(targets, predications):
  return np.sqrt(np.mean(np.square(targets - predications)))
train_pred = model.predict(train_inputs)
val_pred = model.predict(val_inputs)
test_pred = model.predict(test_inputs)

In [157]:
train_loss = rmse(train_targets, train_pred)
val_loss = rmse(val_targets, val_pred)
test_loss = rmse(test_targets, test_pred)
print(train_loss, val_loss, test_loss)

9.725150338615505 10.625028100628798 10.412798617030617


In [158]:
def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

train_mape = mape(train_targets, train_pred)
val_mape = mape(val_targets, val_pred)
test_mape = mape(test_targets, test_pred)

train_accuracy = 100 - train_mape
val_accuracy = 100 - val_mape
test_accuracy = 100 - test_mape

print(f"Training Accuracy: {train_accuracy:.2f}%")
print(f"Validation Accuracy: {val_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")

Training Accuracy: 72.82%
Validation Accuracy: 72.23%
Test Accuracy: 77.93%


In [164]:
train_inputs.columns

Index(['Bought Months Ago', 'Demand', 'Item Name_Apron', 'Item Name_Drafter',
       'Item Name_Lab Coat', 'Item Name_Sheet Holder', 'Condition_Fair',
       'Condition_Good', 'Condition_Like New', 'Condition_Worn Out'],
      dtype='object')

In [160]:
model.predict(np.array([[20, 8, 1, 0, 0, 0, 1, 0, 0, 0]]))




array([35.59113481])

In [161]:
model.coef_

array([ -2.21715529,  -0.28589711,  -5.83617776,  13.0227232 ,
         7.89891524, -15.08546068,  -8.55180288,   9.90338758,
        26.62152233, -27.97310704])

In [162]:
model.intercept_

np.float64(96.6093981793995)

In [163]:
model.predict(np.array([[20, 8, 1, 0, 0, 0, 1, 0, 0, 0]]))



array([35.59113481])

In [165]:
import joblib as jb
Price_Predictor = {'model':model, 'input_cols':train_inputs.columns}

In [166]:
jb.dump(Price_Predictor, 'Price_Predictor.joblib')

['Price_Predictor.joblib']