# **Floral Sales Prediction Project**

Joe Lardie

May 2023

# **Imports**

In [1]:
#Numpy
import numpy as np

#Pandas
import pandas as pd

#Seaborn
import seaborn as sns

#matplotlib
import matplotlib.pyplot as plt
import plotly

#Sklearn preprocessing
from sklearn import preprocessing,set_config
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder,StandardScaler,LabelEncoder

#Sklearn Models
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn import preprocessing, set_config
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer,make_column_selector,make_column_transformer
from sklearn.linear_model import LogisticRegression
import warnings
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore")
set_config(display = 'diagram')

#Sklearn Metrics
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix, roc_auc_score, roc_curve, classification_report


# **Loading Data**

- Since these are massive files, the files are being imported year by year in separate csv files.

In [2]:
# Floral Sales Data for 2018
df18 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2018_to_Dec_31_2018_Garden_Delights_Fine_Florist.csv')
df18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8055 entries, 0 to 8054
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              8055 non-null   object
 1   Transaction Type     8055 non-null   object
 2   Order Time           8055 non-null   object
 3   Order Date           8055 non-null   object
 4   Delivery Date        8048 non-null   object
 5   Sender               5020 non-null   object
 6   Recipient            4022 non-null   object
 7   Product Total        8048 non-null   object
 8   Delivery             8048 non-null   object
 9   Nontaxable Delivery  8048 non-null   object
 10  Wire Out Fee         8020 non-null   object
 11  Discount             8020 non-null   object
 12  Gift Cards           8020 non-null   object
 13  Tax                  8048 non-null   object
 14  Tips                 8020 non-null   object
 15  Grand Total          8055 non-null   object
 16  Paymen

In [3]:
df18.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,100017453,Sale,03:28:16PM Sat,"Dec 29, 2018","Dec 29, 2018",Candyce Williams Glaser,,$815.00,$0.00,$0.00,$0.00,$0.00,$0.00,$79.46,$0.00,$894.46,Credit Card,Taken,Phone
1,100017452,Sale,03:16:50PM Sat,"Dec 29, 2018","Dec 29, 2018",,,$249.90,$0.00,$0.00,$0.00,-$124.96,$0.00,$12.19,$0.00,$137.13,Credit Card,Taken,Walk-In
2,100017451,Sale,02:58:53PM Sat,"Dec 29, 2018","Dec 29, 2018",,,$24.95,$0.00,$0.00,$0.00,$0.00,$0.00,$2.43,$0.00,$27.38,Credit Card,Taken,Walk-In
3,100017450,Sale,02:54:45PM Sat,"Dec 29, 2018","Dec 29, 2018",Tracie Hamilton,,$635.00,$0.00,$0.00,$0.00,$0.00,$0.00,$61.91,$0.00,$696.91,Credit Card,Taken,Phone
4,100017202,Sale,02:48:15PM Sat,"Dec 29, 2018","Dec 23, 2018",Candyce Williams Glaser,Candyce Williams Glaser,$702.00,$0.00,$25.00,$0.00,$0.00,$0.00,$68.45,$0.00,$795.45,Credit Card,Delivery,Walk-In


In [4]:
#floral sales for 2019
df19 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2019_to_Dec_31_2019_Garden_Delights_Fine_Florist.csv')
df19.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8865 entries, 0 to 8864
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              8865 non-null   object
 1   Transaction Type     8865 non-null   object
 2   Order Time           8865 non-null   object
 3   Order Date           8865 non-null   object
 4   Delivery Date        8861 non-null   object
 5   Sender               5404 non-null   object
 6   Recipient            4347 non-null   object
 7   Product Total        8865 non-null   object
 8   Delivery             8861 non-null   object
 9   Nontaxable Delivery  8861 non-null   object
 10  Wire Out Fee         8830 non-null   object
 11  Discount             8830 non-null   object
 12  Gift Cards           8830 non-null   object
 13  Tax                  8861 non-null   object
 14  Tips                 8830 non-null   object
 15  Grand Total          8865 non-null   object
 16  Paymen

In [5]:
df19.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,100026051,Sale,04:42:11PM Tue,31-Dec-19,31-Dec-19,,,$138.00,$0.00,$0.00,$0.00,($69.00),$0.00,$6.73,$0.00,$75.73,Credit Card,Taken,Walk-In
1,100026050,Sale,04:29:19PM Tue,31-Dec-19,31-Dec-19,,,$450.35,$0.00,$0.00,$0.00,($225.21),$0.00,$21.96,$0.00,$247.10,Credit Card,Taken,Walk-In
2,100026049,Sale,02:22:44PM Tue,31-Dec-19,31-Dec-19,,,$97.00,$0.00,$0.00,$0.00,($48.50),$0.00,$4.73,$0.00,$53.23,Credit Card,Taken,Walk-In
3,100026048,Sale,02:14:10PM Tue,31-Dec-19,2-Jan-20,LEIGH MEBEL,JO HORNE,$75.00,$0.00,$12.00,$0.00,$0.00,$0.00,$7.31,$0.00,$94.31,Credit Card,Delivery,Walk-In
4,100026047,Sale,02:00:52PM Tue,31-Dec-19,31-Dec-19,,,$58.85,$0.00,$0.00,$0.00,($29.43),$0.00,$2.87,$0.00,$32.29,Credit Card,Taken,Walk-In


In [6]:
#floral sales for 2020
df20 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2020_to_Dec_31_2020_Garden_Delights_Fine_Florist.csv')
df20.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7607 entries, 0 to 7606
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              7607 non-null   object
 1   Transaction Type     7607 non-null   object
 2   Order Time           7607 non-null   object
 3   Order Date           7607 non-null   object
 4   Delivery Date        7607 non-null   object
 5   Sender               4726 non-null   object
 6   Recipient            3883 non-null   object
 7   Product Total        7607 non-null   object
 8   Delivery             7607 non-null   object
 9   Nontaxable Delivery  7607 non-null   object
 10  Wire Out Fee         7581 non-null   object
 11  Discount             7581 non-null   object
 12  Gift Cards           7581 non-null   object
 13  Tax                  7607 non-null   object
 14  Tips                 7581 non-null   object
 15  Grand Total          7607 non-null   object
 16  Paymen

In [7]:
df20.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,100033474,Sale,03:58:51PM Thu,31-Dec-20,31-Dec-20,,,$199.80,$0.00,$0.00,$0.00,($99.90),$0.00,$9.74,$0.00,$109.64,Credit Card,Taken,Walk-In
1,100033473,Sale,03:52:37PM Thu,31-Dec-20,31-Dec-20,,,$81.70,$0.00,$0.00,$0.00,($40.85),$0.00,$3.99,$0.00,$44.84,Credit Card,Taken,Walk-In
2,100033472,Sale,03:41:53PM Thu,31-Dec-20,31-Dec-20,,,$53.80,$0.00,$0.00,$0.00,($26.90),$0.00,$2.63,$0.00,$29.53,Credit Card,Taken,Walk-In
3,100033471,Sale,03:39:05PM Thu,31-Dec-20,31-Dec-20,,,$100.00,$0.00,$0.00,$0.00,$0.00,$0.00,$9.75,$0.00,$109.75,Credit Card,Taken,Walk-In
4,100033470,Sale,03:35:51PM Thu,31-Dec-20,31-Dec-20,,,$38.85,$0.00,$0.00,$0.00,($19.43),$0.00,$1.89,$0.00,$21.31,Credit Card,Taken,Walk-In


In [8]:
#floral sales for 2021
df21 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2021_to_Dec_31_2021_Garden_Delights_Fine_Florist.csv')
df21.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9670 entries, 0 to 9669
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              9670 non-null   object
 1   Transaction Type     9670 non-null   object
 2   Order Time           9670 non-null   object
 3   Order Date           9670 non-null   object
 4   Delivery Date        9670 non-null   object
 5   Sender               5559 non-null   object
 6   Recipient            4216 non-null   object
 7   Product Total        9670 non-null   object
 8   Delivery             9670 non-null   object
 9   Nontaxable Delivery  9670 non-null   object
 10  Wire Out Fee         9658 non-null   object
 11  Discount             9658 non-null   object
 12  Gift Cards           9658 non-null   object
 13  Tax                  9670 non-null   object
 14  Tips                 9658 non-null   object
 15  Grand Total          9670 non-null   object
 16  Paymen

In [9]:
df21.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,100042953,Sale,05:02:50PM Thu,30-Dec-21,30-Dec-21,,,$265.65,$0.00,$0.00,$0.00,($132.83),$0.00,$12.95,$0.00,$145.77,Credit Card,Taken,Walk-In
1,100042952,Sale,04:44:01PM Thu,30-Dec-21,30-Dec-21,,,$24.95,$0.00,$0.00,$0.00,($12.48),$0.00,$1.21,$0.00,$13.68,Credit Card,Taken,Walk-In
2,100042951,Sale,04:42:00PM Thu,30-Dec-21,30-Dec-21,,,$147.90,$0.00,$0.00,$0.00,($73.95),$0.00,$7.21,$0.00,$81.16,Credit Card,Taken,Walk-In
3,100042950,Sale,04:39:06PM Thu,30-Dec-21,30-Dec-21,,,$129.90,$0.00,$0.00,$0.00,($64.95),$0.00,$6.33,$0.00,$71.28,Credit Card,Taken,Walk-In
4,100042949,Sale,04:37:22PM Thu,30-Dec-21,30-Dec-21,,,$10.00,$0.00,$0.00,$0.00,$0.00,$0.00,$0.98,$0.00,$10.98,Credit Card,Taken,Walk-In


In [10]:
#floral sales for 2022
df22 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2022_to_Dec_31_2022_Garden_Delights_Fine_Florist.csv')
df22.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10640 entries, 0 to 10639
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              10640 non-null  object
 1   Transaction Type     10640 non-null  object
 2   Order Time           10640 non-null  object
 3   Order Date           10640 non-null  object
 4   Delivery Date        10639 non-null  object
 5   Sender               6491 non-null   object
 6   Recipient            5062 non-null   object
 7   Product Total        10640 non-null  object
 8   Delivery             10639 non-null  object
 9   Nontaxable Delivery  10639 non-null  object
 10  Wire Out Fee         10626 non-null  object
 11  Discount             10626 non-null  object
 12  Gift Cards           10626 non-null  object
 13  Tax                  10639 non-null  object
 14  Tips                 10626 non-null  object
 15  Grand Total          10640 non-null  object
 16  Paym

In [11]:
df22.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,100053349,Sale,12:33:14PM Sat,31-Dec-22,31-Dec-22,,,$78.50,$0.00,$0.00,$0.00,$0.00,$0.00,$7.65,$0.00,$86.15,Credit Card,Taken,Walk-In
1,100053348,Sale,12:14:48PM Sat,31-Dec-22,31-Dec-22,,,$34.90,$0.00,$0.00,$0.00,($17.45),$0.00,$1.70,$0.00,$19.15,Credit Card,Taken,Walk-In
2,100053347,Sale,11:38:24AM Sat,31-Dec-22,31-Dec-22,,,$185.00,$0.00,$0.00,$0.00,($92.50),$0.00,$9.02,$0.00,$101.52,Credit Card,Taken,Walk-In
3,100053346,Sale,11:33:39AM Sat,31-Dec-22,31-Dec-22,,,$134.85,$0.00,$0.00,$0.00,($67.43),$0.00,$6.57,$0.00,$73.99,Credit Card,Taken,Walk-In
4,100053345,Sale,11:16:07AM Sat,31-Dec-22,31-Dec-22,,,$44.50,$0.00,$0.00,$0.00,$0.00,$0.00,$4.34,$0.00,$48.84,Credit Card,Taken,Walk-In


In [12]:
#floral sales for 2023
df23 = pd.read_csv('/Users/davyd/OneDrive/Desktop/Sales_Report_Jan_01_2023_to_May_31_2023_Garden_Delights_Fine_Florist.csv')
df23.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4539 entries, 0 to 4538
Data columns (total 19 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Order #              4539 non-null   object
 1   Transaction Type     4539 non-null   object
 2   Order Time           4539 non-null   object
 3   Order Date           4539 non-null   object
 4   Delivery Date        4530 non-null   object
 5   Sender               2839 non-null   object
 6   Recipient            2040 non-null   object
 7   Product Total        4539 non-null   object
 8   Delivery             4539 non-null   object
 9   Nontaxable Delivery  4539 non-null   object
 10  Wire Out Fee         4535 non-null   object
 11  Discount             4535 non-null   object
 12  Gift Cards           4535 non-null   object
 13  Tax                  4539 non-null   object
 14  Tips                 4535 non-null   object
 15  Grand Total          4539 non-null   object
 16  Paymen

In [13]:
df23.head()

Unnamed: 0,Order #,Transaction Type,Order Time,Order Date,Delivery Date,Sender,Recipient,Product Total,Delivery,Nontaxable Delivery,Wire Out Fee,Discount,Gift Cards,Tax,Tips,Grand Total,Payment Method,Order Type,Order Method
0,1000057895,Sale,04:36:16PM Wed,31-May-23,31-May-23,,,$29.95,$0.00,$0.00,$0.00,$0.00,$0.00,$2.92,$0.00,$32.87,Credit Card,Taken,Walk-In
1,1000057894,Sale,04:24:33PM Wed,31-May-23,31-May-23,,,$8.00,$0.00,$0.00,$0.00,$0.00,$0.00,$0.78,$0.00,$8.78,Credit Card,Taken,Walk-In
2,1000057893,Sale,04:18:43PM Wed,31-May-23,31-May-23,,,$65.75,$0.00,$0.00,$0.00,$0.00,$0.00,$6.41,$0.00,$72.16,Credit Card,Taken,Walk-In
3,1000057892,Sale,04:07:47PM Wed,31-May-23,31-May-23,,,$65.90,$0.00,$0.00,$0.00,$0.00,$0.00,$6.43,$0.00,$72.33,Credit Card,Taken,Walk-In
4,1000057891,Adjustment,03:12:30PM Wed,31-May-23,31-May-23,,,$7.95,$0.00,$0.00,$0.00,$0.00,$0.00,$0.78,$0.00,$8.73,Credit Card,Taken,Walk-In


Drop: sender, recipient, wire out fee, Order #, non-taxable delivery, Tips, payment method
Take care of nan values
drop duplicates
change data types to numeric
combine data frames
preprocess the data
build prediction models
tune models
gain insights from findings about trends in the months of the year
forecast future sales data

# **Combining Dataframes Together**

# **Data Cleaning**

# **Something important**

# **hopefully something else important**

# **Who even knows at this point**