In [20]:
import pandas as pd

# Load the data
file_path = r"C:\Users\felic\Desktop\GELO-BOT\Province.csv"
data = pd.read_csv(file_path)

# Clean up column names by stripping any leading/trailing spaces
data.columns = data.columns.str.strip()

# List of relevant columns, excluding the ones that caused the error
columns_of_interest = [
    'Poverty Incidence among Families Estimates (%) 2015u',
    'Poverty Incidence among Families Estimates (%) 2018u',
    'Magnitude of Poor Families Estimate (\'000) 2015u',
    'Magnitude of Poor Families Estimate (\'000) 2018u',
    'Poverty Incidence among Families Coefficient of Variation 2015u',
    'Poverty Incidence among Families Coefficient of Variation 2018u',
    'Poverty Incidence among Families Standard Error 2015u',
    'Poverty Incidence among Families Standard Error 2018u'
]

# Convert columns to numeric, coercing errors to NaN
data[columns_of_interest] = data[columns_of_interest].apply(pd.to_numeric, errors='coerce')

# Filter the dataset using the cleaned column names
try:
    selected_data = data[columns_of_interest]
    # Descriptive Statistics
    descriptive_stats = selected_data.describe()

    # Calculate additional metrics
    # Median
    medians = selected_data.median()

    # Range (Max - Min)
    range_values = selected_data.max() - selected_data.min()

    # Interquartile Range (IQR)
    iqr = selected_data.quantile(0.75) - selected_data.quantile(0.25)

    # Combine everything into a single DataFrame
    summary_stats = pd.DataFrame({
        'Mean': selected_data.mean(),
        'Median': medians,
        'Standard Deviation': selected_data.std(),
        'Range': range_values,
        'Interquartile Range (IQR)': iqr,
        'Min': selected_data.min(),
        'Max': selected_data.max()
    })

    # Print the summary statistics
    print("\nSummary Statistics:")
    print(summary_stats)
except KeyError as e:
    print(f"Error: The following columns are not found in the dataset: {e}")



Summary Statistics:
                                                    Mean  Median  \
Poverty Incidence among Families Estimates (%) ... 24.21   21.50   
Poverty Incidence among Families Estimates (%) ... 17.12   14.40   
Magnitude of Poor Families Estimate ('000) 2015u   48.13   31.55   
Magnitude of Poor Families Estimate ('000) 2018u   34.54   26.60   
Poverty Incidence among Families Coefficient of... 18.53   14.40   
Poverty Incidence among Families Coefficient of... 11.43   10.20   
Poverty Incidence among Families Standard Error...  3.61    3.35   
Poverty Incidence among Families Standard Error...  1.46    1.40   

                                                    Standard Deviation  Range  \
Poverty Incidence among Families Estimates (%) ...               15.16  71.60   
Poverty Incidence among Families Estimates (%) ...               13.91  74.60   
Magnitude of Poor Families Estimate ('000) 2015u                 42.63 203.50   
Magnitude of Poor Families Estimate ('000)