In [None]:
import zipfile

zip_file_path = '/content/national+poll+on+healthy+aging+(npha).zip'  # Update with the correct file path
extract_path = 'desired_extraction_directory'  # Update with the directory where you want to extract

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction complete. Files extracted to:", extract_path)


Extraction complete. Files extracted to: desired_extraction_directory


In [14]:
import pandas as pd

# Load the CSV file
file_path = '/content/NPHA-doctor-visits.csv'  # Update with the correct path
data = pd.read_csv(file_path)

# Correct column names based on the dataset
columns_of_interest = ['Phyiscal Health', 'Mental Health', 'Dental Health',
                        'Employment', 'Stress Keeps Patient from Sleeping']

# Calculate the mean of each factor
factor_means = data[columns_of_interest].mean()

# Identify the top 5 factors
top_5_reasons = factor_means.sort_values(ascending=False).head(5)

# Display the results
print("Top 5 reasons for healthy aging:")
print(top_5_reasons)


Top 5 reasons for healthy aging:
Dental Health                         3.009804
Employment                            2.806723
Phyiscal Health                       2.794118
Mental Health                         1.988796
Stress Keeps Patient from Sleeping    0.247899
dtype: float64


In [15]:
# Group by 'Race' (or another community-related column) and calculate the average of health-related columns
community_health = data.groupby('Race')[['Phyiscal Health', 'Mental Health', 'Dental Health',
                                         'Employment', 'Stress Keeps Patient from Sleeping']].mean()

# Calculate a composite score for each community (average of all health factors)
community_health['Aging Issues Score'] = community_health.mean(axis=1)

# Sort by the 'Aging Issues Score' to identify the community with the highest aging issues
highest_aging_community = community_health['Aging Issues Score'].idxmin()  # Community with the lowest score
highest_aging_score = community_health['Aging Issues Score'].min()

print(f"The community with the highest aging issues is {highest_aging_community} with a score of {highest_aging_score}.")


The community with the highest aging issues is 1 with a score of 2.1491349480968855.


In [16]:
# Group by 'Gender' and calculate the average of health-related columns
gender_health = data.groupby('Gender')[['Phyiscal Health', 'Mental Health', 'Dental Health',
                                         'Employment', 'Stress Keeps Patient from Sleeping']].mean()

# Calculate a composite aging issues score for each gender
gender_health['Aging Issues Score'] = gender_health.mean(axis=1)

# Sort by Aging Issues Score to identify the gender with the most and least aging issues
most_aging_gender = gender_health['Aging Issues Score'].idxmin()  # Gender with the lowest score (most aging issues)
least_aging_gender = gender_health['Aging Issues Score'].idxmax()  # Gender with the highest score (least aging issues)

most_aging_score = gender_health['Aging Issues Score'].min()
least_aging_score = gender_health['Aging Issues Score'].max()

print(f"The gender with the most aging issues is {most_aging_gender} with a score of {most_aging_score}.")
print(f"The gender with the least aging issues is {least_aging_gender} with a score of {least_aging_score}.")


The gender with the most aging issues is 1 with a score of 2.1551401869158875.
The gender with the least aging issues is 2 with a score of 2.18117048346056.


In [17]:
# Correlation of Age, Employment, and Mental Stress with health factors
correlations = data[['Age', 'Employment', 'Stress Keeps Patient from Sleeping',
                     'Phyiscal Health', 'Mental Health', 'Dental Health']].corr()

# Display correlation matrix
print("Correlation between Age, Employment, Mental Stress and Health Aging Factors:")
print(correlations)

# Specifically, we'll look at the correlations between Age, Employment, Stress, and health factors
age_correlation = correlations['Age'][['Phyiscal Health', 'Mental Health', 'Dental Health']]
employment_correlation = correlations['Employment'][['Phyiscal Health', 'Mental Health', 'Dental Health']]
stress_correlation = correlations['Stress Keeps Patient from Sleeping'][['Phyiscal Health', 'Mental Health', 'Dental Health']]

print("\nAge correlation with health factors:")
print(age_correlation)

print("\nEmployment correlation with health factors:")
print(employment_correlation)

print("\nMental Stress correlation with health factors:")
print(stress_correlation)


Correlation between Age, Employment, Mental Stress and Health Aging Factors:
                                    Age  Employment  \
Age                                 NaN         NaN   
Employment                          NaN    1.000000   
Stress Keeps Patient from Sleeping  NaN   -0.043106   
Phyiscal Health                     NaN    0.147526   
Mental Health                       NaN    0.077469   
Dental Health                       NaN    0.076156   

                                    Stress Keeps Patient from Sleeping  \
Age                                                                NaN   
Employment                                                   -0.043106   
Stress Keeps Patient from Sleeping                            1.000000   
Phyiscal Health                                               0.034014   
Mental Health                                                 0.138074   
Dental Health                                                -0.018446   

                  

In [18]:
# Group by a relevant category (e.g., 'Gender', 'Race', or 'Age') and calculate the average number of doctor visits
doctor_visits_by_category = data.groupby('Gender')['Number of Doctors Visited'].mean()

# Find the category with the most number of doctor visits
category_with_most_visits = doctor_visits_by_category.idxmax()  # Category with the highest mean visits
most_visits = doctor_visits_by_category.max()

print(f"The category with the most number of doctor visits is {category_with_most_visits} with an average of {most_visits} visits.")


The category with the most number of doctor visits is 1 with an average of 2.1121495327102804 visits.
