## Hands-on - Matrices, DataFrames, and Time-Series Data

In [1]:
# Import necessary libraries
import pandas as pd  # pandas is used for handling tabular datasets (dataframes) and performing operations such as reading CSV files
import numpy as np  # numpy is used for numerical computations such as working with arrays and applying mathematical operations

# Load dataset from GitHub URL
file_path = "https://raw.githubusercontent.com/Hamed-Ahmadinia/DASP-2025/main/Bike%20Sales.csv"  # URL link to the dataset stored on GitHub

# Read the dataset into a pandas dataframe
df = pd.read_csv(file_path)  # Load the dataset as a pandas DataFrame

# Display the first few rows of the dataframe to confirm the data has been loaded correctly
print("Dataset Preview:")  # Print a label for context
print(df.head(5))  # Display the first 5 rows of the dataset

Dataset Preview:
         Date  Day     Month  Year  Customer_Age       Age_Group  \
0  2013-11-26   26  November  2013            19     Youth (<25)   
1  2015-11-26   26  November  2015            19     Youth (<25)   
2  2014-03-23   23     March  2014            49  Adults (35-64)   
3  2016-03-23   23     March  2016            49  Adults (35-64)   
4  2014-05-15   15       May  2014            47  Adults (35-64)   

  Customer_Gender    Country             State Product_Category Sub_Category  \
0               M     Canada  British Columbia      Accessories   Bike Racks   
1               M     Canada  British Columbia      Accessories   Bike Racks   
2               M  Australia   New South Wales      Accessories   Bike Racks   
3               M  Australia   New South Wales      Accessories   Bike Racks   
4               F  Australia   New South Wales      Accessories   Bike Racks   

               Product  Order_Quantity  Unit_Cost  Unit_Price  Profit  Cost  \
0  Hitch Rack 

### **Exercise 1: Convert the "Date" column to datetime format**
**Question:** Convert the "Date" column to pandas datetime format.

In [3]:
df['Date'] = pd.to_datetime(df['Date'])  # Convert the "Date" column to datetime format
print(df[['Date']].head(5))  # Display the first few rows to confirm
df.info()

        Date
0 2013-11-26
1 2015-11-26
2 2014-03-23
3 2016-03-23
4 2014-05-15
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113036 entries, 0 to 113035
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   Date              113036 non-null  datetime64[ns]
 1   Day               113036 non-null  int64         
 2   Month             113036 non-null  object        
 3   Year              113036 non-null  int64         
 4   Customer_Age      113036 non-null  int64         
 5   Age_Group         113036 non-null  object        
 6   Customer_Gender   113036 non-null  object        
 7   Country           113036 non-null  object        
 8   State             113036 non-null  object        
 9   Product_Category  113036 non-null  object        
 10  Sub_Category      113036 non-null  object        
 11  Product           113036 non-null  object        
 12  Order_Quantity    113036 non-null  i

### **Exercise 2: Set the "Date" column as the index**
**Question:** Set the "Date" column as the index of the DataFrame.

In [30]:
df.set_index('Date', inplace=True)  # Set "Date" as the index
print(df.head(5))  # Display the first few rows to confirm index

            Day     Month  Year  Customer_Age       Age_Group Customer_Gender  \
Date                                                                            
2013-11-26   26  November  2013            19     Youth (<25)               M   
2015-11-26   26  November  2015            19     Youth (<25)               M   
2014-03-23   23     March  2014            49  Adults (35-64)               M   
2016-03-23   23     March  2016            49  Adults (35-64)               M   
2014-05-15   15       May  2014            47  Adults (35-64)               F   

              Country             State Product_Category Sub_Category  \
Date                                                                    
2013-11-26     Canada  British Columbia      Accessories   Bike Racks   
2015-11-26     Canada  British Columbia      Accessories   Bike Racks   
2014-03-23  Australia   New South Wales      Accessories   Bike Racks   
2016-03-23  Australia   New South Wales      Accessories   Bike Rac

### **Exercise 3: Slice the data from '2013-01-01' to '2013-12-31'**
**Question:** Slice the DataFrame to show data for the year 2013.

In [21]:
# Sort the index by date before slicing
df = df.sort_index()

# Now perform the slicing
sliced_df = df['2013-01-01':'2013-12-31']  # Slice the data for the specified date range
print("Sliced Data for 2013:")
print(sliced_df.head(5))  # Display the first few rows of the sliced data

Sliced Data for 2013:
            Day    Month  Year  Customer_Age             Age_Group  \
Date                                                                 
2013-01-01    1  January  2013            29  Young Adults (25-34)   
2013-01-01    1  January  2013            29  Young Adults (25-34)   
2013-01-01    1  January  2013            19           Youth (<25)   
2013-01-01    1  January  2013            53        Adults (35-64)   
2013-01-01    1  January  2013            42        Adults (35-64)   

           Customer_Gender        Country          State Product_Category  \
Date                                                                        
2013-01-01               F  United States        Florida            Bikes   
2013-01-01               M  United States         Oregon            Bikes   
2013-01-01               F  United States     Washington            Bikes   
2013-01-01               F         France  Seine (Paris)            Bikes   
2013-01-01               

### **Exercise 4: Calculate cumulative revenue**
**Question:** Add a new column 'Cumulative_Revenue' that shows the cumulative sum of the revenue.

In [32]:
df['Cumulative_Revenue'] = df['Revenue'].cumsum()  # Calculate the cumulative sum of the "Revenue" column
print(df[['Revenue', 'Cumulative_Revenue']].head(5))  # Display the first few rows to confirm

            Revenue  Cumulative_Revenue
Date                                   
2013-11-26      950                 950
2015-11-26      950                1900
2014-03-23     2401                4301
2016-03-23     2088                6389
2014-05-15      418                6807


### **Exercise 5: Downsample to show monthly total revenue (Hint: Use resampling)**
**Question:** Resample the data to calculate total monthly revenue.

In [34]:
monthly_revenue = df.resample('ME').sum()  # Resample data by month and sum the "Revenue"
print(monthly_revenue[['Revenue']].head())  # Display the first few rows of the monthly revenue summary

            Revenue
Date               
2011-01-31   675193
2011-02-28   637598
2011-03-31   708517
2011-04-30   698782
2011-05-31   734537
