In [1]:
import pandas as pd
import matplotlib.pyplot as plt

# Load CSV (ensure the file exists and has the correct structure)
df = pd.read_csv('user_data.csv')

# Check if the DataFrame is empty or lacks required columns
if df.empty or 'age' not in df.columns or 'total_income' not in df.columns:
    raise ValueError("DataFrame is empty or missing required columns ('age' or 'total_income'). Check 'user_data.csv'.")

# Convert 'total_income' to numeric, coercing errors to NaN, and drop rows with NaN in key columns
df['total_income'] = pd.to_numeric(df['total_income'], errors='coerce')
df = df.dropna(subset=['age', 'total_income'])

# Visualization 1: Ages with highest income
high_income_ages = df.groupby('age')['total_income'].mean().sort_values(ascending=False).head(5)

# Check if high_income_ages is empty or non-numeric
if high_income_ages.empty or not pd.api.types.is_numeric_dtype(high_income_ages):
    raise ValueError("No valid numeric data for 'high_income_ages'. Check data integrity.")

plt.figure(figsize=(10, 6))
high_income_ages.plot(kind='bar', color=['#00796b', '#d81b60', '#ffca28', '#0288d1', '#7b1fa2'])
plt.title('Top 5 Ages with Highest Average Income', fontsize=14, color='#424242')
plt.xlabel('Age', fontsize=12)
plt.ylabel('Average Income ($)', fontsize=12)
plt.tight_layout()
plt.savefig('high_income_ages.png')
plt.close()

# Visualization 2: Gender distribution across spending categories
spending_by_gender = df.groupby('gender')[['utilities', 'entertainment', 'school_fees', 'shopping', 'healthcare']].sum()

# Check if spending_by_gender is empty or non-numeric
if spending_by_gender.empty or not spending_by_gender.dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x)).all():
    raise ValueError("No valid numeric data for 'spending_by_gender'. Check data integrity.")

plt.figure(figsize=(10, 6))
spending_by_gender.plot(kind='bar', color=['#00796b', '#d81b60', '#ffca28', '#0288d1', '#7b1fa2'])
plt.title('Gender Distribution Across Spending Categories', fontsize=14, color='#424242')
plt.xlabel('Gender', fontsize=12)
plt.ylabel('Total Spending ($)', fontsize=12)
plt.legend(title='Categories')
plt.tight_layout()
plt.savefig('spending_by_gender.png')
plt.close()

print("Visualizations saved as PNG files.")


Visualizations saved as PNG files.


<Figure size 1000x600 with 0 Axes>