# Imports all sales data into a usable format.

In [None]:
import pandas as pd
import os

# List of uploaded file paths
file_paths = [
    '/content/data_1.csv',
    '/content/data_2.csv',
    '/content/data_3.csv',
    '/content/data_4.csv'
]

# List to hold DataFrames for each file
dataframes = []

# Load each file into a DataFrame and append to the list
for file_path in file_paths:
    df = pd.read_csv(file_path)
    dataframes.append(df)

# Concatenate all DataFrames into one
all_sales_data = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the combined DataFrame
print(all_sales_data.head())


         Date   Product     Category  Quantity  Price  Discount
0  2024-08-01  ProductA  Electronics      10.0  200.0      0.05
1  2024-08-01  ProductB         Home       5.0  150.0      0.10
2  2024-08-02  ProductA  Electronics       7.0  200.0       NaN
3  2024-08-02  ProductC         Home       3.0  100.0      0.00
4  2024-08-03  ProductD    Furniture       4.0  300.0      0.15


# Cleans the data by removing errors and filling in missing values.

In [None]:
all_sales_data.drop_duplicates(inplace=True)
print(all_sales_data.head())

         Date   Product     Category  Quantity  Price  Discount
0  2024-08-01  ProductA  Electronics      10.0  200.0      0.05
1  2024-08-01  ProductB         Home       5.0  150.0      0.10
2  2024-08-02  ProductA  Electronics       7.0  200.0       NaN
3  2024-08-02  ProductC         Home       3.0  100.0      0.00
4  2024-08-03  ProductD    Furniture       4.0  300.0      0.15


In [None]:
all_sales_data.fillna({
    'Date': 0,
    'Product': 'Unknown',
    'Category':'Unknown',
    'Quantity': 0,
    'Price': 0,
    'Discount':0

}, inplace=True)
print(all_sales_data.head())

         Date   Product     Category  Quantity  Price  Discount
0  2024-08-01  ProductA  Electronics      10.0  200.0      0.05
1  2024-08-01  ProductB         Home       5.0  150.0      0.10
2  2024-08-02  ProductA  Electronics       7.0  200.0      0.00
3  2024-08-02  ProductC         Home       3.0  100.0      0.00
4  2024-08-03  ProductD    Furniture       4.0  300.0      0.15


# Calculates additional information like net price (Net_Price = Price × (1 - Discount)) and
# total revenue (Total_Revenue = Quantity × Net_Price) for each sale.

In [None]:
# (Net_Price = Price × (1 - Discount))
all_sales_data['Net_Price'] = all_sales_data['Price'] * (1 - all_sales_data['Discount'])

# (Total_Revenue = Quantity × Net_Price)
all_sales_data['Total_Revenue'] = all_sales_data['Quantity'] * all_sales_data['Net_Price']

print(all_sales_data.head())


         Date   Product     Category  Quantity  Price  Discount  Net_Price  \
0  2024-08-01  ProductA  Electronics      10.0  200.0      0.05      190.0   
1  2024-08-01  ProductB         Home       5.0  150.0      0.10      135.0   
2  2024-08-02  ProductA  Electronics       7.0  200.0      0.00      200.0   
3  2024-08-02  ProductC         Home       3.0  100.0      0.00      100.0   
4  2024-08-03  ProductD    Furniture       4.0  300.0      0.15      255.0   

   Total_Revenue  
0         1900.0  
1          675.0  
2         1400.0  
3          300.0  
4         1020.0  


# Combines data from all stores into a single overview

In [None]:
summary = all_sales_data.groupby('Product').agg({
    'Quantity': 'sum',
    'Total_Revenue': 'sum'
}).reset_index()
print(all_sales_data.head())

         Date   Product     Category  Quantity  Price  Discount  Net_Price  \
0  2024-08-01  ProductA  Electronics      10.0  200.0      0.05      190.0   
1  2024-08-01  ProductB         Home       5.0  150.0      0.10      135.0   
2  2024-08-02  ProductA  Electronics       7.0  200.0      0.00      200.0   
3  2024-08-02  ProductC         Home       3.0  100.0      0.00      100.0   
4  2024-08-03  ProductD    Furniture       4.0  300.0      0.15      255.0   

   Total_Revenue  
0         1900.0  
1          675.0  
2         1400.0  
3          300.0  
4         1020.0  


# Saves the summary results to a new CSV file.

In [19]:
output_path = os.path.join('/content/data_1.csv', '/content/data_2.csv','/content/data_3.csv','/content/summary_sales _data.csv')
summary.to_csv(output_path, index=False)