### Load Datasets

In [4]:
import pandas as pd

# Load datasets
sales_by_day = pd.read_csv('../../data/raw/LastWeekSalesByDay.csv', delimiter=';')
top_customers = pd.read_csv('../../data/raw/LastWeekTopCustomers.csv', delimiter=';')
top_products = pd.read_csv('../../data/raw/LastWeekTopProducts.csv', delimiter=';')
order_statistics = pd.read_csv('../../data/raw/LastWeekOrderStatistics.csv', delimiter=';')
cash_flow_report = pd.read_csv('../../data/raw/CashFlowReport.csv', delimiter=';')
understocked_products = pd.read_csv('../../data/raw/UnderstockedProducts.csv', delimiter=';')

### Sales By Day

In [5]:
print(sales_by_day.head())

         Date   Product category  Revenue
0  15/04/2024     Acoustic Drums   195870
1  15/04/2024   Acoustic Guitars    90500
2  15/04/2024   Audio Interfaces    18100
3  15/04/2024       Bass Guitars   100150
4  15/04/2024  Classical Guitars    48280


In [6]:
# Group the data by "Product category" and calculate the sum of "Revenue" for each group
grouped_df_sales = sales_by_day.groupby('Product category')['Revenue'].sum().reset_index()

# Display the resulting DataFrame as a table
print(grouped_df_sales.to_string(index=False))

         Product category  Revenue
           Acoustic Drums  1019960
         Acoustic Guitars   483050
         Audio Interfaces   139730
             Bass Guitars   634400
        Classical Guitars   258250
             DJ Equipment   526550
           Digital Pianos   871850
                Drum Kits   539600
         Electric Guitars   742960
                   Flutes   485500
            Guitar Pedals   130160
               Harmonicas    31232
                Keyboards  1165950
     Live Sound Equipment   657550
         MIDI Controllers   396930
              Microphones   240130
Music Production Software   209326
       Orchestral Strings    99110
   Percussion Accessories    96390
                   Pianos        0
               Saxophones  1509000
          Studio Monitors   161010
             Synthesizers  1133790
                 Trumpets   628000
                 Ukuleles   417210
                  Violins   191930


In [7]:
# check if there are duplicated in the sales_by_day csv file
print(sales_by_day.duplicated().sum())

0


### Top Customers

In [8]:
print(top_customers.head())

            Customer Name  Revenue
0                Web Site   284790
1   Crescendo Corner Ltd.   277990
2  Symphony Supplies Inc.   272393
3    Tempo Treasures Ltd.   252129
4      Symphony Shop Ltd.   245906


In [9]:
# Group the data by "Product category" and calculate the sum of "Revenue" for each group
grouped_df_customers = top_customers.groupby('Customer Name')['Revenue'].sum().reset_index()

# Display the resulting DataFrame as a table
print(grouped_df_customers.to_string(index=False))

              Customer Name  Revenue
    Acoustic Artifacts Inc.    95680
     Acoustic Artistry Ltd.   120340
          Acoustic Aura LLC   116550
      Acoustic Avenues Inc.   127430
      Acoustic Avenues Ltd.   166390
        Allegro Arcade Inc.   158486
       Allegro Artisan Inc.   105710
       Allegro Artisan Ltd.    92625
        Allegro Attune Inc.   124470
        Allegro Avenue Inc.    75350
        Allegro Avenue Ltd.   144070
       Crescendo Cache Inc.   136850
Crescendo Collectibles Inc.   142060
  Crescendo Collection Inc.   194755
 Crescendo Collections Ltd.   114436
      Crescendo Corner Inc.   112990
      Crescendo Corner Ltd.   277990
 Crescendo Cornerstone Inc.   164590
 Crescendo Cornerstone Ltd.   142140
      Crescendo Crafts Inc.   212580
      Crescendo Crafts Ltd.    95555
   Crescendo Creations Inc.   132010
    Crescendo Creations LLC   133845
 Crescendo Instruments Ltd.   153120
         Echo Elegance Ltd.   127430
         Echo Emporium Ltd.   156874
 

In [10]:
# size of the processed dataset
print(grouped_df_customers.shape)


(92, 2)


In [11]:
# check if there are duplicated in the top_customers csv file
print(top_customers.duplicated().sum())

0


### Top Products

In [12]:
top_products.head()

Unnamed: 0,Product Description,Revenue
0,Selmer Paris Reference 54 Alto,409500
1,Keilwerth SX90R,264000
2,Roland Fantom-8,248500
3,Tama Starclassic Bubinga 5-Piece Shell Pack,245000
4,Music Man John Petrucci Majesty,240000


In [13]:
# Group the data by "Product category" and calculate the sum of "Revenue" for each group
grouped_df_products = top_products.groupby('Product Description')['Revenue'].sum().reset_index()

# Display the resulting DataFrame as a table
print(grouped_df_products.to_string(index=False))

                                          Product Description  Revenue
                                                AKG C414 XLII    71500
                                        Ableton Live 11 Suite    50250
                                               Adam Audio A7X    21000
                                          Akai MPK Mini MKIII     6700
                                                  Akai MPK249    20800
                                           Alesis Recital Pro    17150
                                                  Alesis VI49    11500
                                                  Alhambra 1C    21000
                           Allen & Heath QU-16C Digital Mixer   151300
                                       Allen & Heath Xone:PX5    98000
              Alvarez Artist Series AF30 Folk Acoustic Guitar    19800
                                Amahi UK660 Pineapple Ukulele     4760
                              Anuenue aNN-900 Soprano Ukulele    76000
      

In [14]:
print(grouped_df_products.shape)

(281, 2)


In [15]:
# check if there are duplicated in the top_products csv file
print(top_products.duplicated().sum())

0


### Order Statistics

In [16]:
print(order_statistics.head())

            Channel  Orders Count  Order Lines  Quantity in pieces  \
0  Online Retailers           520         3683                5160   
1     Retail Stores          1170         4150                4632   
2          Web Site           270          350                 396   
3       Wholesalers           355         5100                6783   

   Quantity in boxes   Amount  Percentage of fulfillment  
0                746  3642673                         98  
1                667  3513708                         97  
2                 56   284790                         99  
3                956  5328397                         96  


### Understocked Products

In [17]:
print(understocked_products.head())

                         Product Description  Minimum stock  Current stock  \
0                           Apple GarageBand              8              3   
1  Aquila New Nylgut Soprano Ukulele Strings              5              1   
2           Bosendorfer Imperial Grand Piano              9              0   
3       Casio CDP-S150 Compact Digital Piano              7              2   
4                              EastTop T008K              6              2   

   Expected Qty  Required stock  
0            15               1  
1            13               0  
2            10               0  
3            18               0  
4             1               1  


### Cashflow

In [18]:
print(cash_flow_report.head())

    Debit  Credit  Balance  Inflows Forecast  Outflows Forecast  \
0  300000  450000   150000          15000000            9000000   

   Projected Balance  
0           23850000  


In [27]:
# Initialize total revenue for the specified date
total_revenue_M = 0

# Iterate over each row in the sales by day and add when date is 20/04/2024
for index, row in sales_by_day.iterrows():
    if row['Date'] == '15/04/2024':
        total_revenue_M += row['Revenue']

# Print the total revenue
print(total_revenue_M)

2134744


In [28]:
# Initialize total revenue for the specified date
total_revenue_Y = 0

# Iterate over each row in the sales by day and add when date is 20/04/2024
for index, row in sales_by_day.iterrows():
    if row['Date'] == '20/04/2024':
        total_revenue_Y += row['Revenue']

# Print the total revenue
print(total_revenue_Y)

2083229


In [29]:
print(total_revenue_M - total_revenue_Y)

51515


In [25]:
# Initialize total revenue for the specified date
count = 0

# Iterate over each row in the sales by day and add when date is 20/04/2024
for index, row in sales_by_day.iterrows():
    if row['Date'] == '15/04/2024':
        count += 1

# Print the total revenue
print(count)

26


In [26]:
# Initialize total revenue for the specified date
count = 0

# Iterate over each row in the sales by day and add when date is 20/04/2024
for index, row in sales_by_day.iterrows():
    if row['Date'] == '20/04/2024':
        count += 1

# Print the total revenue
print(count)

26
