In [9]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [10]:
df = pd.read_csv('/content/Business_Analytics_Dataset_10000_Rows.csv')

In [11]:
df.head(10)

Unnamed: 0,Order_ID,Customer_ID,Order_Date,Region,Product_Category,Customer_Segment,Quantity,Unit_Price,Discount_Rate,Revenue,Cost,Profit,Payment_Method
0,1,CUST3818,2024-08-18,North,Clothing,Corporate,5,300.68,0.27,1097.48,768.29,329.19,Credit Card
1,2,CUST9689,2024-06-19,South,Beauty,Home Office,9,32.89,0.02,290.09,179.33,110.76,Debit Card
2,3,CUST9147,2024-11-21,West,Sports,Corporate,5,345.61,0.25,1296.04,1022.6,273.44,Credit Card
3,4,CUST7938,2024-07-19,North,Clothing,Consumer,1,444.5,0.06,417.83,280.99,136.84,UPI
4,5,CUST5127,2024-10-28,South,Home & Kitchen,Consumer,5,65.13,0.21,257.26,151.9,105.36,Credit Card
5,6,CUST4834,2024-10-22,North,Sports,Consumer,1,253.38,0.01,250.85,164.5,86.35,Credit Card
6,7,CUST4649,2024-05-01,West,Electronics,Home Office,8,338.17,0.11,2407.77,1564.72,843.05,Credit Card
7,8,CUST3844,2024-08-11,South,Clothing,Corporate,9,270.42,0.24,1849.67,1413.48,436.19,UPI
8,9,CUST1425,2024-05-12,West,Electronics,Corporate,9,32.71,0.18,241.4,174.37,67.03,Credit Card
9,10,CUST3261,2024-05-07,South,Beauty,Consumer,10,15.32,0.18,125.62,67.54,58.08,Debit Card


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Order_ID          10000 non-null  int64  
 1   Customer_ID       10000 non-null  object 
 2   Order_Date        10000 non-null  object 
 3   Region            10000 non-null  object 
 4   Product_Category  10000 non-null  object 
 5   Customer_Segment  10000 non-null  object 
 6   Quantity          10000 non-null  int64  
 7   Unit_Price        10000 non-null  float64
 8   Discount_Rate     10000 non-null  float64
 9   Revenue           10000 non-null  float64
 10  Cost              10000 non-null  float64
 11  Profit            10000 non-null  float64
 12  Payment_Method    10000 non-null  object 
dtypes: float64(5), int64(2), object(6)
memory usage: 1015.8+ KB


In [13]:
average_order_value = df['Revenue'].sum() / df['Order_ID'].nunique()
print(f"The Average Order Value is: {average_order_value:.2f}")

The Average Order Value is: 1195.24


In [14]:
df['Order_Date'] = pd.to_datetime(df['Order_Date'])
df['YearMonth'] = df['Order_Date'].dt.to_period('M')

monthly_revenue = df.groupby('YearMonth')['Revenue'].sum().reset_index()
monthly_revenue['Growth_Rate'] = monthly_revenue['Revenue'].pct_change() * 100

display(monthly_revenue)

Unnamed: 0,YearMonth,Revenue,Growth_Rate
0,2024-01,1030685.81,
1,2024-02,953256.37,-7.512419
2,2024-03,992355.74,4.101664
3,2024-04,979309.98,-1.314625
4,2024-05,1014429.28,3.586127
5,2024-06,987704.04,-2.63451
6,2024-07,1049945.0,6.30158
7,2024-08,996244.72,-5.11458
8,2024-09,1046701.7,5.064717
9,2024-10,974671.96,-6.881592


In [15]:
Total_units_sold = df['Quantity'].sum()
print(f"The Total Units Sold is: {Total_units_sold}")

The Total Units Sold is: 55143


In [16]:
customer_purchase_counts = df['Customer_ID'].value_counts()
repeat_customers = customer_purchase_counts[customer_purchase_counts > 1]

total_unique_customers = len(customer_purchase_counts)
num_repeat_customers = len(repeat_customers)

repeat_purchase_rate = (num_repeat_customers / total_unique_customers) * 100

print(f"Total Unique Customers: {total_unique_customers}")
print(f"Number of Repeat Customers: {num_repeat_customers}")
print(f"Repeat Purchase Rate: {repeat_purchase_rate:.2f}%")

Total Unique Customers: 5994
Number of Repeat Customers: 2746
Repeat Purchase Rate: 45.81%


In [17]:
df.head()

Unnamed: 0,Order_ID,Customer_ID,Order_Date,Region,Product_Category,Customer_Segment,Quantity,Unit_Price,Discount_Rate,Revenue,Cost,Profit,Payment_Method,YearMonth
0,1,CUST3818,2024-08-18,North,Clothing,Corporate,5,300.68,0.27,1097.48,768.29,329.19,Credit Card,2024-08
1,2,CUST9689,2024-06-19,South,Beauty,Home Office,9,32.89,0.02,290.09,179.33,110.76,Debit Card,2024-06
2,3,CUST9147,2024-11-21,West,Sports,Corporate,5,345.61,0.25,1296.04,1022.6,273.44,Credit Card,2024-11
3,4,CUST7938,2024-07-19,North,Clothing,Consumer,1,444.5,0.06,417.83,280.99,136.84,UPI,2024-07
4,5,CUST5127,2024-10-28,South,Home & Kitchen,Consumer,5,65.13,0.21,257.26,151.9,105.36,Credit Card,2024-10


In [19]:
import pandas as pd
df = pd.read_csv('/content/Business_Analytics_Dataset_10000_Rows.csv')
profit_margin = (df['Profit'].sum() / df['Revenue'].sum()) * 100
print(f"The overall Profit Margin is: {profit_margin:.2f}%")

The overall Profit Margin is: 35.05%


In [20]:
average_discount_percentage = df['Discount_Rate'].mean() * 100
print(f"The Average Discount Percentage is: {average_discount_percentage:.2f}%")

The Average Discount Percentage is: 15.03%
