In [None]:
# %% [markdown]
"""
# India Census 2011 Analysis

This notebook analyzes the 2011 India Census data containing:
- Population statistics
- Demographic breakdowns
- Literacy rates
- Religious composition
- Worker classifications
across all districts and states.
"""

# %%
import pandas as pd
import numpy as np

# Configure display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# %% [markdown]
"""
## Data Loading and Initial Inspection
"""
# %%
def load_census_data(filepath):
    """Load census data with error handling and initial checks"""
    try:
        df = pd.read_csv(filepath)
        print(f"Dataset loaded successfully with {df.shape[0]} districts and {df.shape[1]} features")
        
        # Basic data validation
        required_columns = ['State_name', 'District_name', 'Population', 
                          'Male_Workers', 'Female_Workers', 'Literate']
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
            
        return df
    except FileNotFoundError:
        print("Error: File not found. Please check the file path.")
    except Exception as e:
        print(f"An error occurred while loading the data: {str(e)}")
    return None

census_df = load_census_data("Census_2011_Dataset.csv")

# %% [markdown]
"""
## Data Presentation Enhancements
"""
# %%
def enhance_census_display(df):
    """Improve dataframe display for analysis"""
    
    # 1. Hide indexes in display
    print("\n=== First 5 Districts (Index Hidden) ===")
    display(df.head().style.hide(axis='index'))
    
    # 2. Add caption to dataframe
    print("\n=== Dataset with Descriptive Caption ===")
    display(df.head().style.set_caption('India Census 2011 Dataset - District Level Data'))
    
    return df

census_df = enhance_census_display(census_df)

# %% [markdown]
"""
## District-Level Analysis
"""
# %%
def analyze_districts(df):
    """Perform district-specific analyses"""
    
    # 3. Major cities analysis
    major_cities = ['New Delhi', 'Lucknow', 'Jaipur']
    print(f"\n=== Census Data for {', '.join(major_cities)} ===")
    city_data = df[df['District_name'].isin(major_cities)]
    display(city_data)
    
    # 6. Set district code as index (temporary)
    print("\n=== District Code as Index (Demonstration) ===")
    display(df.set_index('District_code').head())
    
    return df

census_df = analyze_districts(census_df)

# %% [markdown]
"""
## State-Level Analysis
"""
# %%
def analyze_states(df):
    """Perform comprehensive state-level analyses"""
    
    # 4A. State-wise population totals
    print("\n=== State-wise Population Totals ===")
    state_population = df.groupby('State_name')['Population'].sum().sort_values(ascending=False)
    display(state_population.head(10).to_frame('Total Population'))
    
    # 4B. Religious composition by state
    religions = ['Hindus', 'Muslims', 'Christians', 'Sikhs', 'Buddhists', 'Jains']
    print("\n=== Religious Composition by State ===")
    religion_by_state = df.groupby('State_name')[religions].sum().sort_values('Hindus', ascending=False)
    display(religion_by_state.head(10).style.format("{:,.0f}"))
    
    # 5. Male workers in Maharashtra
    print("\n=== Male Workers in Maharashtra ===")
    maharashtra_male_workers = df[df['State_name'] == 'MAHARASHTRA']['Male_Workers'].sum()
    print(f"Total Male Workers in Maharashtra: {maharashtra_male_workers:,}")
    
    return df

census_df = analyze_states(census_df)

# %% [markdown]
"""
## Data Transformation
"""
# %%
def transform_census_data(df):
    """Modify dataframe structure as needed"""
    
    # 7A. Add suffix to column names
    print("\n=== Adding Suffix to Columns (Demonstration) ===")
    display(df.add_suffix('_2011').head(2))
    
    # 7B. Add prefix to column names
    print("\n=== Adding Prefix to Columns (Demonstration) ===")
    display(df.add_prefix('Census_').head(2))
    
    # Recommendation: Don't actually modify original column names
    print("\nRecommendation: Keep original column names for clarity")
    return df

census_df = transform_census_data(census_df)

# %% [markdown]
"""
## Key Findings Summary
1. **Population Distribution**:
   - Most populous state: [STATE1] with [X] people
   - Least populous state: [STATE2] with [Y] people

2. **Religious Composition**:
   - [STATE3] has the highest Hindu population ([Z]%)
   - [STATE4] has the highest Muslim population ([A]%)

3. **Workforce Statistics**:
   - Maharashtra has [B] million male workers
   - [Other significant workforce findings]

4. **Literacy Patterns**:
   - Highest literacy in [DISTRICT1] at [C]%
   - Lowest literacy in [DISTRICT2] at [D]%
"""

# %% [markdown]
"""
## Additional Recommended Analyses
"""
# %%
def additional_analyses(df):
    """Suggested additional analyses"""
    
    # Literacy rate by state
    print("\n=== Literacy Rates by State ===")
    df['Literacy_Rate'] = (df['Literate'] / df['Population']) * 100
    literacy_by_state = df.groupby('State_name')['Literacy_Rate'].mean().sort_values(ascending=False)
    display(literacy_by_state.head(10).to_frame('Literacy Rate (%)'))
    
    # Gender ratio analysis
    print("\n=== Gender Ratios by State ===")
    df['Gender_Ratio'] = (df['Female'] / df['Male']) * 1000
    gender_ratio_by_state = df.groupby('State_name')['Gender_Ratio'].mean().sort_values(ascending=False)
    display(gender_ratio_by_state.head(10).to_frame('Females per 1000 Males'))
    
    return df

census_df = additional_analyses(census_df)