In [105]:
# In this file we will import our dataframe for the rug sales data. We will use tensor flow to build a model that predicts if a rugs DSI will be below a certain threshold
# For this project, we will want to see if a rug will sale in the first 60 days after purchase, or DSI <= 60

In [106]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [107]:
# import the data from the csv file
df = pd.read_csv(r'C:\Users\dakot\OneDrive\Desktop\Rug Gallery\RugAnalytics\RandomRugData.csv')

In [108]:
df.columns

Index(['Unnamed: 0', 'Invoice#', 'Date', 'Vendor', 'Collection', 'StyleNumber',
       'SizeExact', 'SizeGroup', 'RedPrice', 'Discount', 'SalePrice',
       'StyleGroup', 'StyleGroup2', 'DomColor', 'MinColor', 'ConstructionType',
       'Material', 'Pad', 'CustomerID', 'S/O', 'InvoiceTotal', 'DSI'],
      dtype='object')

In [109]:
# drop columns with na

df = df.dropna()

In [110]:
#encode the target variable

df['DSI'] = df['DSI'].apply(lambda x: 1 if x <= 60 else 0)

In [111]:
# check to see if target variable is encoded
df.tail()

Unnamed: 0.1,Unnamed: 0,Invoice#,Date,Vendor,Collection,StyleNumber,SizeExact,SizeGroup,RedPrice,Discount,...,StyleGroup2,DomColor,MinColor,ConstructionType,Material,Pad,CustomerID,S/O,InvoiceTotal,DSI
795,795,14985,2022-12-13,Couristan,Algiers,308532,6'7x9'6,6,521.79,0.0,...,Solid,Brown,Beige,tufted,Wool,0,2969884941,1,521.79,0
796,796,14987,2023-07-15,Artisan,Damascus,374425,4x6,4,324.05,0.0,...,Ornate,Gold,Gray,MM,Polyester,0,2519053852,0,324.05,0
797,797,14988,2022-06-04,Kas,Guatemala City,231758,10x14,10,1216.78,235.98,...,Ornate,Taupe,Brown,HM,Wool,0,4842602723,1,980.8,0
798,798,14994,2023-03-26,OrientalWeavers,Ulaanbaatar,398581,10x14,10,1203.45,240.63,...,Abstract,Brown,Gray,tufted,Wool,0,9160685619,1,962.82,0
799,799,14998,2022-02-01,DynamicRugs,Santo Domingo,592319,6'7x9'6,6,470.96,0.0,...,Distressed,Gray,Navy,HM,Wool,1,7339045135,1,470.96,0


In [112]:
#split the data into train and test, we will use 80% of the data for training and 20% for testing

from sklearn.model_selection import train_test_split

X = df.drop('DSI', axis=1)
y = df['DSI']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [113]:
#we need to encode the categorical and numeric features
numeric_features = ['RedPrice', 'Discount', 'SalePrice', 'InvoiceTotal']
categorical_features = ['Invoice#', 'Date','Vendor', 'Collection', 'StyleNumber', 'SizeExact', 'SizeGroup', 'StyleGroup', 'StyleGroup2', 'DomColor', 'MinColor', 'ConstructionType', 'Material', 'Pad', 'S/O']

In [114]:
# We will import the proper preprocessing libraries

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Create the preprocessor
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

In [115]:
#lets build the model

from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline

mlp = make_pipeline(
    preprocessor,
    MLPClassifier(hidden_layer_sizes=(50,), max_iter=50, random_state=42)
)
mlp.fit(X_train, y_train)

#print the accuracy
y_pred = mlp.predict(X_test)
accuracy = mlp.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.8375




In [118]:
#lets create a new sample to represent a potential rug line to bring in to the store
#we will predict the probablility that is will sell in the first 60 days

new_sample = pd.DataFrame({
    'Invoice#': ['12345'],
    'Date': ['2023-04-01'],
    'Vendor': ['Karastan'],
    'Collection': ['Sarajevo'],
    'StyleNumber': ['12345'],
    'SizeExact': ['8x10'],
    'SizeGroup': ['8'],
    'RedPrice': [800.0],
    'Discount': [100.0],
    'SalePrice': [700.0],
    'InvoiceTotal': [700.0],
    'StyleGroup': ['Contemporary'],
    'StyleGroup2': ['Modern'],
    'DomColor': ['Red'],
    'MinColor': ['Blue'],
    'ConstructionType': ['MM'],
    'Material': ['polyester'],
    'Pad': [0],
    'S/O': ['S'],
})

In [119]:
probability = mlp.predict_proba(new_sample)[:, 1][0]
print(f'Predicted probability of sale in the first 60 days: {probability * 100:.2f}%')

Predicted probability of sale in the first 60 days: 17.88%
