In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  

In [None]:
df=pd.read_csv("//kaggle/input/health-equipment-manufacturer-data/Aerofit_Case_Study.csv")

In [None]:
df.head()

In [None]:
print(df.shape)
print(f"No of rows:{df.shape[0]}")
print(f"No of columns:{df.shape[1]}")

In [None]:
df.describe(include='all')

In [None]:
# ⭐From above information we can interpret that:
#-->Out of 180 customers, males are 104 in number while female are (180-104=76).
#-->There are 3 unique products, and "KP281" is the top with frequency 80.
#-->Marital status of most of the customers is "Partnered" with frequency 107.
#-->Range of age of the customers is (min=28.79,max=50), 
#   while 75% customer's age is <=33, 
#   50% customer's age is <=26,
#   25% customer's age is <=24.
#-->Range of education of the customers is (min=12,max=21), 
#   while 75% customer's education is <=16, 
#   50% customer's education is <=16,
#   25% customer's education is <=14.
#-->Standard deviation of Income and Miles is very high, hence these variables might have
#   outliers in it.

## Marginal Probability 



In [None]:
df1 = df[['Product', 'Gender', 'MaritalStatus']].melt()
round((df1.groupby(['variable', 'value'])[['value']].count()/len(df)*100),2)

In [None]:
df.info()

In [None]:
#⭐ There are no NULL values in the data.

In [None]:
df['Product'].value_counts() 

## Univariate_Analysis

## Understanding the distribution of data for the quantitative attributes:
## ['Age', 'Education', 'Usage','Fitness', 'Income', 'Miles']

In [None]:
fig, axis = plt.subplots(3,2, figsize=(10,5))
fig.subplots_adjust(top=2)
sns.histplot(data=df, x="Age", kde=True, ax=axis[0,0])
sns.histplot(data=df, x="Education", kde=True, ax=axis[0,1])
sns.histplot(data=df, x="Usage", kde=True, ax=axis[1,0])
sns.histplot(data=df, x="Fitness", kde=True, ax=axis[1,1])
sns.histplot(data=df, x="Income", kde=True, ax=axis[2,0])
sns.histplot(data=df, x="Miles", kde=True, ax=axis[2,1])
plt.show()

In [None]:
# Detecting outliers using boxplots

fig, axis = plt.subplots(3,2, figsize=(10,5))
fig.subplots_adjust(top=2)
sns.boxplot(data=df, x="Age",ax=axis[0,0])
sns.boxplot(data=df, x="Education", ax=axis[0,1])
sns.boxplot(data=df, x="Usage",ax=axis[1,0])
sns.boxplot(data=df, x="Fitness",ax=axis[1,1])
sns.boxplot(data=df, x="Income",ax=axis[2,0])
sns.boxplot(data=df, x="Miles",ax=axis[2,1])
plt.show()

In [None]:
#⭐ From boxplots it is very much evident that 
#-->Income and Miles are having more outliers

## Understanding the distribution of data for the Categorical  attributes:
## ['Product', 'Gender', 'MaritalStatus']

In [None]:
fig, axis= plt.subplots(1,3, figsize=(15,5))
sns.countplot(data=df, x="Product", ax=axis[0])
sns.countplot(data=df, x="Gender", ax=axis[1])
sns.countplot(data=df, x="MaritalStatus", ax=axis[2])
plt.show()

In [None]:
## ⭐ From the above observation of that data_set we can conclude that:
#--> In Product variable "KP281" is most frequent.
#--> In Gender Varibale "Male" is most frequent.
#--> In MaritalStatus Variable "Partnered" is the most frequent.

## Bivariate Analysis

## Checking if varibales - 'Gender' or 'MaritalStatus' have any effect on the product purchased.

In [None]:
fig,axis= plt.subplots(1,2, figsize=(10,5))
sns.countplot(data=df, x='Product', hue='Gender', ax=axis[0])
axis[0].set_title('Product V/S Gender')
sns.countplot(data=df, x='Product', hue='MaritalStatus', ax=axis[1])
axis[1].set_title('Product V/S MaritalStatus')
plt.show()

In [None]:
## ⭐Observation for Product V/S Gender:
# --> For product 'KP781', Male have purchased more item.
# --> for product 'KP481', Female have almost purchased same as man.
# --> For product 'KP281', Male and Female purchased exactly same no of item.

In [None]:
## ⭐Observation for Product V/S MaritalStatus:
# --> For product 'KP781', 'Partnered' have purchased more than 'Single'.
# --> for product 'KP481', 'Partnered' have purchased more than 'Single'.
# --> For product 'KP281', 'Partnered' have purchased more than 'Single'.

In [None]:
fig,axis= plt.subplots(2,3, figsize=(18,12))
sns.boxplot(data=df, x='Product', y='Age',orient='v', ax=axis[0,0])
axis[0,0].set_title('Product V/S Age')
sns.boxplot(data=df, x='Product', y='Education', ax=axis[0,1])
axis[0,1].set_title('Product V/S Education')
sns.boxplot(data=df, x='Product', y='Usage', ax=axis[0,2])
axis[0,2].set_title('Product V/S Usage')
sns.boxplot(data=df, x='Product', y='Fitness', ax=axis[1,0])
axis[1,0].set_title('Product V/S Fitness')
sns.boxplot(data=df, x='Product', y='Income', ax=axis[1,1])
axis[1,1].set_title('Product V/S Income')
sns.boxplot(data=df, x='Product', y='Miles', ax=axis[1,2])
axis[1,2].set_title('Product V/S Miles')
plt.show()

In [None]:
## Observation
#-->For "Product V/S Age" 
#1-Customers purchasing products KP281 & KP481 are having same Age median value.
#2-Customers whose age lies between 25-30, are more likely to buy KP781 product.

#-->For "Product V/S Education"
#1-Customers whose Education is greater than 16, have more chances to purchase the KP781 product.
#2-While the customers with Education less than 16 have equal chances of purchasing KP281 or KP481.

#-->For "Product V/S Usage"
#1-Customers who are planning to use the treadmill greater than 4 times a week, are more likely to purchase the KP781 product.
#2-While the other customers are likely to purchasing KP281 or KP481.

#-->For "Product V/S Fitness"
#1-The more the customer is fit (fitness >= 3), higher the chances of the customer to purchase the KP781 product.

#-->For "Product V/S Income"
#1- Higher the Income of the customer (Income >= 60000), higher the chances of the customer to purchase the KP781 product.

#-->For "Product V/S Miles"
#1- Customer walking greater than 120 Miles per week, it is more likely that the customer will buy KP781 product.

## Conditional Probability 

In [None]:
#--> Conditional probability (KP281|Male)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP281') &(df['Gender']=='Male')])
Number_Condition = len(df[df["Gender"]=='Male'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP281|Male) = {Conditional_probability:.2f}")




#--> Conditional probability (KP481|Male)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP481') &(df['Gender']=='Male')])
Number_Condition = len(df[df["Gender"]=='Male'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP481|Male) = {Conditional_probability:.2f}")


#--> Conditional probability (KP781|Male)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP781') &(df['Gender']=='Male')])
Number_Condition = len(df[df["Gender"]=='Male'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP781|Male) = {Conditional_probability:.2f}")


#--> Conditional probability (KP281|Female)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP281') &(df['Gender']=='Female')])
Number_Condition = len(df[df["Gender"]=='Female'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP281|Female) = {Conditional_probability:.2f}")




#--> Conditional probability (KP481|Female)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP481') &(df['Gender']=='Female')])
Number_Condition = len(df[df["Gender"]=='Female'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP481|Female) = {Conditional_probability:.2f}")


#--> Conditional probability (KP781|Female)
Number_Of_Product_And_Condition = len(df[(df["Product"]=='KP781') &(df['Gender']=='Female')])
Number_Condition = len(df[df["Gender"]=='Female'])
Conditional_probability = Number_Of_Product_And_Condition/Number_Condition
print(f"Conditional Probability (KP781|Female) = {Conditional_probability:.2f}")

## Based on the analysis of the dataset and the insights gained, here are some recommendations and insights for AeroFit to improve their marketing strategies and target specific demographics for each product:

# Product Differentiation and Targeting:

KP281 (Entry-Level Treadmill): This product is popular among younger individuals, particularly singles, with a median age around 22. It's important to target this demographic by emphasizing affordability, ease of use, and features suitable for beginners. Advertise on platforms frequented by younger, single individuals.

KP481 (Mid-Level Treadmill): KP481 appears to attract a slightly older demographic, with a median age around 26, and is preferred by both singles and partnered customers. Emphasize its features that cater to moderate runners and the value it offers for its price point. Target both single and partnered individuals in this age group.

KP781 (Advanced Treadmill): KP781 is popular among customers in their late twenties to early thirties. It's essential to highlight its advanced features, durability, and suitability for serious runners. Target financially stable individuals, both singles and partnered, within this age range.

# Gender-Based Targeting:

Males are slightly more inclined towards KP781, which is the advanced treadmill. Consider marketing KP781 as a high-performance option for fitness enthusiasts and athletes, emphasizing its durability and advanced features to appeal to male customers.

Females show a preference for KP481. Promote KP481 as a versatile treadmill suitable for various fitness levels and lifestyles. Highlight its value proposition and user-friendly features.

# Marital Status Influence:

Both single and partnered individuals purchase AeroFit treadmills, but the choice of the product may vary. Singles tend to opt for the entry-level KP281, while partnered customers are more evenly distributed between KP481 and KP781. Consider tailoring marketing messages accordingly.


# Age Matters:

Younger customers (early twenties) prefer KP281, which aligns with their fitness needs and budget. Target this demographic through online platforms, social media, and fitness forums.

As customers get older (mid-twenties to early thirties), their preferences shift toward mid-level and advanced treadmills (KP481 and KP781). Use targeted advertising on fitness websites, email marketing, and fitness events to reach this age group.

# Income Levels:

Income levels vary across different age groups. Consider offering financing options or discounts for younger customers who may have a lower income but still want access to better fitness equipment.


# Fitness Enthusiast Segment:


Identify customers who rate their fitness level as 4 or 5. These are potential advocates for AeroFit products. Consider creating a loyalty program or referral program to encourage them to promote your products within their fitness communities.