In [1]:
# dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
# import data
df = pd.read_csv('data.csv')
df

Unnamed: 0,product_key,sale_date,sale_price
0,12345,1/1/2021,100
1,12345,1/2/2021,100
2,12345,1/3/2021,100
3,12345,1/3/2021,100
4,12345,1/5/2021,100
...,...,...,...
488,12345,12/19/2022,100
489,12345,12/21/2022,100
490,12345,12/24/2022,100
491,12345,12/27/2022,100


In [3]:
# Create features dataframe
X_df = pd.to_datetime(df['sale_date'])

In [4]:
# Create X dataframe
X = pd.DataFrame(X_df)
X['Year'] = pd.DatetimeIndex(X_df).year
X['Month'] = pd.DatetimeIndex(X_df).month
X['Day'] = pd.DatetimeIndex(X_df).day
X['Quarter'] = pd.DatetimeIndex(X_df).quarter
X.drop('sale_date', axis=1, inplace=True)

In [5]:
# Define acceptable / discounted price
discount_price = (df['sale_price'].min() + df['sale_price'].max()) / 2
discount_price

93.5

In [6]:
# Create y dataframe
# labels = 1 ('buy now') or 0 ('wait for discount')
y = pd.DataFrame({'sale_price': df['sale_price'],
                  'class': pd.cut(df['sale_price'], 
                                  bins = [0, discount_price, df['sale_price'].max()], 
                                  labels = ['1','0'])})

In [7]:
# Check y classes
y.value_counts()

sale_price  class
100         0        334
99          0        132
89          1         21
95          0          5
87          1          1
dtype: int64

In [8]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y['class'], random_state=1)

In [9]:
# Creating StandardScaler instance
scaler = StandardScaler()

# Fitting Standard Scaler
X_scaler = scaler.fit(X_train)

# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# check shape of training/testing data sets
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(369, 4)
(124, 4)
(369,)
(124,)


In [11]:
# # create the model
rf_model = RandomForestClassifier(n_estimators=128, random_state=1)

In [12]:
# fit model to scaled training data
rf_model = rf_model.fit(X_train_scaled, y_train)

In [13]:
# # Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)
predictions

array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '1', '0', '0'], dtype=object)

In [14]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [15]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,117,3
Actual 1,0,4


Accuracy Score : 0.9758064516129032
Classification Report
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       120
           1       0.57      1.00      0.73         4

    accuracy                           0.98       124
   macro avg       0.79      0.99      0.86       124
weighted avg       0.99      0.98      0.98       124

