In [1]:
import pandas as pd
import numpy as np

# Read the CSV file
df = pd.read_csv('Country-data.csv')

# Calculate basic statistics for all numerical columns
print("Basic Statistics for Numerical Columns:")
print(df.describe())

# Let's create some interesting analyses by grouping countries based on income levels
# First, create income categories
df['income_category'] = pd.cut(df['income'], 
                              bins=[0, 5000, 20000, 50000, float('inf')],
                              labels=['Low Income', 'Middle Income', 'High Income', 'Very High Income'])

# Calculate mean values for each income category
print("\nMean Values by Income Category:")
group_means = df.groupby('income_category')[['life_expec', 'child_mort', 'health', 'total_fer']].mean()
print(group_means)

# Find correlations between numerical columns
print("\nCorrelation Matrix:")
correlation_matrix = df.select_dtypes(include=[np.number]).corr()
print(correlation_matrix.round(2))

# Interesting findings for high child mortality countries
print("\nTop 5 Countries with Highest Child Mortality:")
print(df.nlargest(5, 'child_mort')[['country', 'child_mort', 'life_expec', 'income']])

Basic Statistics for Numerical Columns:
       child_mort     exports      health     imports         income  \
count  167.000000  167.000000  167.000000  167.000000     167.000000   
mean    38.270060   41.108976    6.815689   46.890215   17144.688623   
std     40.328931   27.412010    2.746837   24.209589   19278.067698   
min      2.600000    0.109000    1.810000    0.065900     609.000000   
25%      8.250000   23.800000    4.920000   30.200000    3355.000000   
50%     19.300000   35.000000    6.320000   43.300000    9960.000000   
75%     62.100000   51.350000    8.600000   58.750000   22800.000000   
max    208.000000  200.000000   17.900000  174.000000  125000.000000   

        inflation  life_expec   total_fer           gdpp  
count  167.000000  167.000000  167.000000     167.000000  
mean     7.781832   70.555689    2.947964   12964.155689  
std     10.570704    8.893172    1.513848   18328.704809  
min     -4.210000   32.100000    1.150000     231.000000  
25%      1.81000

  group_means = df.groupby('income_category')[['life_expec', 'child_mort', 'health', 'total_fer']].mean()
