In [75]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, mean_absolute_error

In [76]:
sales_data = pd.read_csv('dataset/Project Dataset/sales_data.csv')
amount_data = pd.read_csv('dataset/Project Dataset/amount_data.csv')

In [77]:
sales_data['Date'] = pd.to_datetime(sales_data['Date'], dayfirst = True)
sales_data = sales_data.sort_values(by = 'Date')
sales_data = sales_data.reset_index().drop(columns=['index'])
sales_data

Unnamed: 0,Date,Model,Quantity
0,2022-04-01,18 F AC,0.0
1,2022-04-01,3X7NEXX,0.0
2,2022-04-01,2T5GDEL,0.0
3,2022-04-01,2T5RRLX-GX,0.0
4,2022-04-01,2T5RRLX-XX,0.0
...,...,...,...
93598,2024-04-30,40 inch LED,0.0
93599,2024-04-30,32 inch LED,0.0
93600,2024-04-30,Multiplug,0.0
93601,2024-04-30,1D4GDEH,1.0


In [78]:
amount_data['Date'] = pd.to_datetime(amount_data['Date'], dayfirst = True)
amount_data = amount_data.sort_values(by = 'Date')
amount_data = amount_data.reset_index().drop(columns=['index'])
amount_data

Unnamed: 0,Date,Amount
0,2022-04-01,0
1,2022-04-02,239400
2,2022-04-03,274140
3,2022-04-04,177000
4,2022-04-05,106000
...,...,...
756,2024-04-26,0
757,2024-04-27,582650
758,2024-04-28,367700
759,2024-04-29,451000


In [79]:
sales_data_matrix = sales_data.pivot_table(index = 'Date', columns = 'Model', values = 'Quantity', aggfunc = 'sum', fill_value = 0).reset_index()
sales_data_matrix
data = pd.merge(sales_data_matrix, amount_data, on = 'Date', how = 'inner')

In [80]:
data['day_of_year'] = data['Date'].dt.dayofyear
data['month'] = data['Date'].dt.month
data['day_of_week'] = data['Date'].dt.dayofweek
data = data.drop(columns='Date')
data

Unnamed: 0,12 C AC,12 inch Fan,14 inch Fan,17 inch Fan,18 A AC,18 C AC,18 F AC,1B3GDEL,1B6GDEH,1B6GDEL,...,Stand Fan,Tornedo Fan,Wall Move,Washing Machine,Water Filter,Weight Scale,Amount,day_of_year,month,day_of_week
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,91,4,4
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,239400,92,4,5
2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,274140,93,4,6
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,177000,94,4,0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,106000,95,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
756,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,117,4,4
757,0.0,0.0,3.0,11.0,1.0,4.0,0.0,0.0,0.0,1.0,...,2.0,0.0,0.0,1.0,0.0,0.0,582650,118,4,5
758,1.0,0.0,2.0,20.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,367700,119,4,6
759,0.0,0.0,5.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,451000,120,4,0


In [81]:
products = data.columns.to_list()[:-4]
for product in products:
    print(product)
    classification_data = data[['day_of_year', 'month', 'day_of_week', product]].copy()

    # Defining conditions here
    conditions = [
        classification_data[product] > 2,
        classification_data[product] == 2,
        classification_data[product] == 1,  
        classification_data[product] == 0  
    ]

    # Defining the corresponding outputs for each condition
    choices = [3, 2, 1, 0]

    # Apply np.select to assign values based on the conditions
    classification_data.loc[:, 'Sale'] = np.select(conditions, choices, default=0)

    X = classification_data[['day_of_year', 'month', 'day_of_week']]
    y_class = classification_data['Sale']  # Classification target
    y_reg = classification_data[product]  # Regression target

    X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size = 0.2, random_state = 42)

    clf = RandomForestClassifier(n_estimators = 100, random_state = 42)
    clf.fit(X_train_class, y_train_class)

    y_pred_class = clf.predict(X_test_class)
    # print("Classification Report:\n", classification_report(y_test_class, y_pred_class))

    y_pred_class_all = clf.predict(X)
    # print("Classification Report on Full Data:\n", classification_report(y_class, y_pred_class_all))

    X_train_reg = X[y_class == 3]
    y_train_reg = y_reg[y_class == 3]

    if (len(X_train_reg) > 0):
        # Initialize the SGDRegressor with a small learning rate
        reg = SGDRegressor(max_iter=100, tol=1e-3, random_state=42)

        # Scaling the data for regression
        scaler = StandardScaler()
        X_train_reg_scaled = scaler.fit_transform(X_train_reg)

        # Train the regression model with SGDRegressor
        reg.fit(X_train_reg_scaled, y_train_reg)

        for i in range(len(y_pred_class_all)):
            if y_pred_class_all[i] == 3:  # If a sale is predicted
                y_pred_class_all[i] = reg.predict([X.iloc[i]])[0]  # Predict the quantity

    print(mean_absolute_error(y_reg, y_pred_class_all))

12 C AC
0.009198423127463863
12 inch Fan




0.4021024967148489
14 inch Fan
5.043363994743758
17 inch Fan
4.421813403416557
18 A AC
0.006570302233902759
18 C AC




0.03679369250985545
18 F AC
0.002628120893561104
1B3GDEL
0.003942181340341655
1B6GDEH
0.003942181340341655
1B6GDEL
0.03679369250985545
1B6RXXX
0.00788436268068331
1D4CRXX




0.005256241787122208
1D4GDEH
0.006570302233902759
1D4GDEL




0.06176084099868594
1D4GDSH




0.005256241787122208
1D5GDEL




0.018396846254927726
1D5RXXX
0.001314060446780552
1F3GDEH




0.18396846254927726
1F3GDEL




1.0880420499342969
1F3GDSH




0.06701708278580815
1F3RXXX




0.05913272010512484
1H5ELXX
0.001314060446780552
1H5GDEL
0.011826544021024968
1H5GDSH
0.0
1X1RXXX
0.003942181340341655
24 B AC




0.010512483574244415
24 C AC
0.002628120893561104
24 inch LED




0.11826544021024968
2A3ELXX
0.0
2A3GDEH
0.013140604467805518
2A3GDEL




0.2588699080157687
2A3GDSH




0.00788436268068331
2A3GDXX




0.19710906701708278
2A3RLXX
0.001314060446780552
2A7GDEL
0.003942181340341655
2A8GDXX
0.003942181340341655
2B0GDXX
0.010512483574244415
2B3GDEL
0.026281208935611037
2B3GDSH
0.006570302233902759
2B3GDXX




0.035479632063074903
2B6GDEL
0.002628120893561104
2B6RXXX
0.002628120893561104
2D4GDEL
0.013140604467805518
2D4RXXX
0.003942181340341655
2E0GDEL
0.03153745072273324
2E0GDXX




0.024967148488830485
2E4GDEH
0.002628120893561104
2E4GDXX
0.001314060446780552
2E5EHLX
0.0
2E5GDEL
0.013140604467805518
2F0GDEL
0.0
2F0GDXX
0.005256241787122208
2G0CGXX
0.00788436268068331
2H2GDEL
0.001314060446780552
2H2GDXX (Inv)




0.018396846254927726
2N5CRXX
0.003942181340341655
2N5GDEL
0.003942181340341655
2N5GDEL (Inv)
0.002628120893561104
2N5GDXX
0.003942181340341655
2T5GDEL
0.02890932982917214
2T5RRLX-GX




0.003942181340341655
2T5RRLX-XX
0.0
32 inch LED
0.21156373193166886
3A2GDEL
0.0
3A2GDEN
0.002628120893561104
3A2GDXX
0.002628120893561104
3A7GDXX
0.005256241787122208
3A7NXXX
0.002628120893561104
3B0GDEL
0.003942181340341655
3B0GDXX
0.003942181340341655
3B0GDXX (inv)
0.0
3D8GD-DD
0.0
3D8GDEH-DD
0.009198423127463863
3D8GDEL
0.011826544021024968
3D8GDEL (inv)
0.00788436268068331
3D8GDSH
0.0
3D8GDXX
0.001314060446780552
3E8GDXX
0.0
3F5GDSH
0.001314060446780552
3F5GDXX
0.003942181340341655
3J0GDEL
0.005256241787122208
3J0GDEL DD
0.001314060446780552
3J0RXXX
0.002628120893561104
3X7GDEL




0.022339027595269383
3X7NEXX
0.005256241787122208
3X9GDEL
0.001314060446780552
3X9GDXX
0.0
40 inch LED
0.018396846254927726
43 inch 4K
0.026281208935611037
5F3GDEL
0.003942181340341655
5F3GDEL-DD
0.003942181340341655
Air Cooler
0.002628120893561104
Blender




0.045992115637319315
Ceiling Fan
0.10775295663600526
Fry Pan




0.02759526938239159
Gas Stove




0.11300919842312747
Glyser
0.08278580814717477
Grinder




0.24310118265440211
Induction Cooker
0.005256241787122208
Infered Cooker




0.030223390275952694
Iron




0.3180026281208936
Kettle
0.17608409986859397
Light Bulb




0.006570302233902759
Mobile H10
0.0
Mobile N5
0.006570302233902759
Mobile P16
0.009198423127463863
Multi Cooker
0.009198423127463863
Multiplug
0.006570302233902759
Oven
0.017082785808147174
Pressure Cooker
0.030223390275952694
Protector




0.011826544021024968
Remote




0.01971090670170828
Rice Cooker
0.24441524310118265
Room Heater




0.022339027595269383
Ruti Tawa
0.005256241787122208
Speaker
0.001314060446780552
Stabilizer
0.00788436268068331
Stand Fan




0.06438896189224705
Tornedo Fan
0.006570302233902759
Wall Move




0.026281208935611037
Washing Machine
0.03153745072273324
Water Filter
0.003942181340341655
Weight Scale
0.005256241787122208
