In [3]:
import pandas as pd

# Read the cleaned.csv file
df = pd.read_csv('cleaned.csv')

# Filter the DataFrame by Disability_indicator, keeping only values that are 1
filtered_df = df[df['Disability_indicator'] == 1]

# Calculate the number of rows in filtered_df
num_rows = len(filtered_df)
print(f"Number of rows with Disability_indicator as 1: {num_rows}")

Number of rows with Disability_indicator as 1: 2876


In [4]:
# Filter the DataFrame based on the conditions for Difficulty_walking, Difficulty_hearing, and Difficulty_seeing
filtered_df = filtered_df[
    (filtered_df['Difficulty_walking'].isin([2, 3, 4])) |
    (filtered_df['Difficulty_hearing'].isin([2, 3, 4])) |
    (filtered_df['Difficulty_seeing'].isin([2, 3, 4]))
]

# Count the number of rows after filtering
num_filtered_rows = len(filtered_df)
print(f"Number of rows after filtering: {num_filtered_rows}")

# Select only the relevant columns, including 'pid'
filtered_df = filtered_df[['pid', 'Disability_indicator', 'Difficulty_walking', 'Difficulty_hearing', 'Difficulty_seeing']]

# Save the filtered DataFrame to a new CSV file
filtered_df.to_csv('filtered_disability_data.csv', index=False)

Number of rows after filtering: 2722


In [5]:
import pandas as pd

# Read the filtered_disability_data.csv file
filtered_df = pd.read_csv('filtered_disability_data.csv')

# Create a new attribute called 'issue'
filtered_df['issue'] = ''

# Define a function to determine the issue based on difficulty attributes
def determine_issue(row):
    issues = []
    
    # Check for difficulties in walking, hearing, and seeing
    walking = row['Difficulty_walking']
    hearing = row['Difficulty_hearing']
    seeing = row['Difficulty_seeing']
    
    # Check if there are any values greater than 2 in the difficulty attributes
    higher_difficulties = any(difficulty > 2 for difficulty in [walking, hearing, seeing])
    
    # Case 1: When 3 or 4 exists, append the respective issue
    if walking in [3, 4]:
        issues.append('mobility')
    if hearing in [3, 4]:
        issues.append('dhh')
    if seeing in [3, 4]:
        issues.append('blv')
    
    # Case 2: When no 3 or 4 exists, append any 2
    if not higher_difficulties:
        if walking == 2:
            issues.append('mobility')
        if hearing == 2:
            issues.append('dhh')
        if seeing == 2:
            issues.append('blv')
    
    return ', '.join(issues)

# Apply the function to each row
filtered_df['issue'] = filtered_df.apply(determine_issue, axis=1)

# Split the 'issue' column into multiple columns
issues_split = filtered_df['issue'].str.split(', ', expand=True)

# Rename the split columns as issue_1, issue_2, issue_3, etc.
issues_split.columns = [f'issue_{i+1}' for i in range(issues_split.shape[1])]

# Concatenate the new columns with the original dataframe
filtered_df = pd.concat([filtered_df, issues_split], axis=1)

# Save the DataFrame with the new 'issue' attribute to a new CSV file
filtered_df.to_csv('filtered_disability_data_with_issues.csv', index=False)