In [1]:
import pandas as pd

# The standard Db is imported from Kaggle , The metadata of the csv file is added in the folder.
standardDB = pd.read_csv('StandardDb.csv')
print(standardDB.head())

   Gender  Height  Weight  Index
0    Male     174      96      4
1    Male     189      87      2
2  Female     185     110      4
3  Female     195     104      3
4    Male     149      61      3


In [75]:
# Basic info from standard database

standardDB_male = standardDB[standardDB["Gender"] == "Male"]
standardDB_female = standardDB[standardDB["Gender"] == "Female"]

Male_means = standardDB_male.mean(numeric_only = True)
average_male_height , average_male_weight  = Male_means.Height , Male_means.Weight

Female_means = standardDB_female.mean(numeric_only = True)
average_female_height , average_female_weight  = Female_means.Height , Female_means.Weight

print("Average Male Weight = ", average_male_weight)
print("Average Male Height = ", average_male_height)
print("Average Female Weight = ", average_female_weight)
print("Average Female Height = ", average_female_height)

def calculate_average_bmi(df , Col):
  # if 'height' and 'weight' columns not exist return NONE
  for col in Col:
        if col not in df.columns :
            return None
  df['BMI'] = df[Col[0]] / ((df[Col[1]] / 100) ** 2)  # Calculate BMI without condition
  df['BMI'] = df['BMI'].where((df['BMI'] > 5) & (df['BMI'] < 50), None)  # Apply condition with bitwise AND
  return df['BMI'].mean()

male_bmi_average =  calculate_average_bmi(standardDB_male.copy(),Col = [ "Weight" , "Height"])
print("Average Male BMI = " , male_bmi_average)

female_bmi_average =  calculate_average_bmi(standardDB_female.copy() , Col = [ "Weight" , "Height"])
print("Average Female BMI = " , female_bmi_average)

Average Male Weight =  106.31428571428572
Average Male Height =  169.64897959183673
Average Female Weight =  105.69803921568628
Average Female Height =  170.22745098039215
Average Male BMI =  33.25715463576634
Average Female BMI =  32.479909012171284


In [86]:
# Data from users database

# importing collected data from MongoDB
Mongo_uri = "mongodb+srv://DBAnalyser:readdb@vitals.h942r2r.mongodb.net/?retryWrites=true&w=majority&appName=vitals";
from pymongo import MongoClient
client = MongoClient(Mongo_uri)
db = client["connectDB"]
user_collection = db["users"]

all_users_cursor = user_collection.find({},{"_id":0,'__v': 0 , "age" : 0 , "state":0 , "city": 0}) 

#List of all users 
all_users = [document for document in all_users_cursor]
all_users_cursor.close()

# dividing list between males and females 
all_users_male , all_users_female = [user for user in all_users if user["gender"] == "male" ] , [user for user in all_users if user["gender"] == "female"]
print(all_users_male)
# The Data interpretation can be done in state wise too ,
# but due to lack of data we are limiting our study to Country Specific (India) only.

[{'weight': 66, 'gender': 'male', 'height': 176}, {'weight': 70, 'gender': 'male', 'height': 180}, {'weight': 69, 'gender': 'male', 'height': 180}, {'weight': 69, 'gender': 'male', 'height': 180}, {'weight': 67, 'gender': 'male', 'height': 168}, {'weight': 82, 'gender': 'male', 'height': 188}, {'weight': 66, 'gender': 'male', 'height': 176}, {'weight': 94, 'gender': 'male', 'height': 24}, {'weight': 64, 'gender': 'male', 'height': 171}]


In [116]:
# Data Filtering 

#A normal person has bmi in range [10-50]
def is_normal_person(data):
    bmi = data["weight"] / (data["height"] / 100) ** 2
    return bmi >= 10 and bmi <= 50
    
all_users_male   = [ user for user in all_users_male if is_normal_person(user)]
all_users_female = [ user for user in all_users_female if is_normal_person(user)]
# print(all_users_female)

In [98]:
# Basic info from users database

average_users_male_bmi = calculate_average_bmi(pd.DataFrame(all_users_male) , Col = ["weight", "height"])
average_users_female_bmi = calculate_average_bmi(pd.DataFrame(all_users_female) , Col = ["weight", "height"])

average_indian_male_stats = pd.DataFrame(all_users_male).mean(numeric_only = True)
average_indian_female_stats = pd.DataFrame(all_users_female).mean(numeric_only = True)

print("Average Indian Male Stats")
print("bmi\t",average_users_male_bmi)
print(average_indian_male_stats,"\n\n")

print("Average Indian Female Stats")
print("bmi\t",average_users_female_bmi)
print(average_indian_female_stats)

Average Indian Male Stats
bmi	 21.95468111962029
weight     69.125
height    177.375
dtype: float64 


Average Indian Female Stats
bmi	 22.496314406808697
weight     54.285714
height    154.428571
dtype: float64


In [99]:
# information about bmi

def bmi_category(bmi):
    if bmi < 18.5:
        return "Underweight"
    elif bmi < 25:
        return "Normal weight"
    elif bmi < 30:
        return "Overweight"
    else:
        return "Obese"

In [115]:
#Comparasions with Regionless data

print("Regionless Data")
print(f"Average Height Male : {round(average_male_height, 2)}")
print(f"Average Weight Male : {round(average_male_weight,2)}")
print(f"Average Height Female : {round(average_female_height,2)}")
print(f"Average Weight Female : {round(average_female_weight,2)}")
print(f"Average BMI Male : {round(male_bmi_average,2)}")
print(f"BMI class: {bmi_category(male_bmi_average)}")
print(f"Average BMI Female : {round(female_bmi_average,2)}")
print(f"BMI class: {bmi_category(female_bmi_average)}")


print("\n\nRegionial Data")
print(f"Average Height Male : {round(average_indian_male_stats.height, 2)}")
print(f"Average Weight Male : {round(average_indian_male_stats.weight,2)}")
print(f"Average Height Female : {round(average_indian_female_stats.height,2)}")
print(f"Average Weight Female : {round(average_indian_female_stats.weight,2)}")
print(f"Average BMI Male : {round(average_users_male_bmi,2)}")
print(f"BMI class: {bmi_category(average_users_male_bmi)}")
print(f"Average BMI Female : {round(average_users_female_bmi,2)}")
print(f"BMI class: {bmi_category(average_users_female_bmi)}")


Regionless Data
Average Height Male : 169.65
Average Weight Male : 106.31
Average Height Female : 170.23
Average Weight Female : 105.7
Average BMI Male : 33.26
BMI class: Obese
Average BMI Female : 32.48
BMI class: Obese


Regionial Data
Average Height Male : 177.38
Average Weight Male : 69.12
Average Height Female : 154.43
Average Weight Female : 54.29
Average BMI Male : 21.95
BMI class: Normal weight
Average BMI Female : 22.5
BMI class: Normal weight


In [None]:
# Outcomes 
"""
## Analysis of Body Mass Index (BMI) Data

This report presents the findings of a BMI analysis for two datasets: Regionless Data and Regional Data. 

**Regionless Data:**

* **Males:**
    * Average Height: 169.65 cm
    * Average Weight: 106.31 kg
    * Average BMI: 33.26 (Obese)
* **Females:**
    * Average Height: 170.23 cm
    * Average Weight: 105.7 kg
    * Average BMI: 32.48 (Obese)

**Observations:**

* Both males and females in the Regionless Data set fall under the "Obese" category according to their average BMI.
* This indicates a potential public health concern as obesity can lead to various health risks.

**Regional Data:**

* **Males:**
    * Average Height: 177.38 cm
    * Average Weight: 69.12 kg
    * Average BMI: 21.95 (Normal weight)
* **Females:**
    * Average Height: 154.43 cm
    * Average Weight: 54.29 kg
    * Average BMI: 22.5  (Normal weight)

**Observations:**

* Both males and females in the Regional Data set fall under the "Normal weight" category according to their average BMI.
* This suggests a significant difference in weight management compared to the Regionless Data.

**Possible Explanations for the Discrepancy:**

* **Regional factors:** Diet, exercise habits, and access to healthcare might vary across regions.
* **Sample size:** The data might not represent the entire population in each region. 
* **Missing information:** The cause for the lack of regional information in the first dataset is unclear.

**Recommendations:**

* Further investigation is needed to understand the reasons behind the observed differences.
* Strategies to promote healthy weight management across all regions could be beneficial.

**Limitations:**

* This analysis only considers average values and may not reflect individual variations.
* The lack of information about the origin of the Regionless Data limits further conclusions.

**Conclusion:**

The provided data highlights a potential public health concern in the Regionless Data set due to high average BMI. While the Regional Data suggests a healthier weight range, further investigation is needed to understand the contributing factors and explore potential interventions for overall population health improvement.

"""