# GDP per Capita Analysis
Mount Google Drive and set working directory.

In [None]:
from google.colab import drive
# Mount Google Drive for file access
drive.mount('/content/drive')

import os
# Set the working directory to the project notebook folder
os.chdir('/content/drive/MyDrive/python-data-analysis-bootcamp/notebooks')

# Display the current working directory
os.getcwd()

# GDP per Capita Analysis with Pandas
We explore and visualise GDP per capita data.

## 1. Load the dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the GDP per capita dataset
df = pd.read_csv('../data/gdp_per_capita.csv')

# Display the first 10 rows
df.head(10)

## 2. Basic inspection

In [None]:
# Display dataset structure and column information
df.info()

In [None]:
# Summary statistics for all columns
df.describe(include='all')

## 3. Select relevant columns

In [None]:
# Select only the columns needed for analysis
cols = ['Country/Territory', 'UN_Region', 'GDP_per_capita']
df = df[cols]

# View the trimmed dataframe
df.head()

## 4. Handle missing values

In [None]:
# Count missing values in each column
df.isna().sum()

In [None]:
# Remove rows with missing values in key columns
df_clean = df.dropna(subset=['Country/Territory', 'UN_Region', 'GDP_per_capita'])

# Preview the cleaned dataframe
df_clean.head()

## 5. GDP per capita by UN region

In [None]:
# Calculate the average GDP per capita for each UN region
region_gdp = (
    df_clean
    .groupby('UN_Region')['GDP_per_capita']
    .mean()
    .sort_values(ascending=False)
)

# Display the regional averages
region_gdp

### 5.1 Visualise regional averages

In [None]:
# Plot the average GDP per capita for each UN region
plt.figure(figsize=(10, 6))
region_gdp.plot(kind='bar')

# Add labels and title
plt.ylabel('Average GDP per capita')
plt.title('GDP per Capita by Region')

# Rotate x-axis labels for readability
plt.xticks(rotation=45, ha='right')

# Add gridlines and adjust layout
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Top and bottom countries

In [None]:
# Select the 10 countries with the highest GDP per capita
top10 = df_clean.sort_values(by='GDP_per_capita', ascending=False).head(10)
top10

In [None]:
# Select the 10 countries with the lowest GDP per capita
bottom10 = df_clean.sort_values(by='GDP_per_capita', ascending=True).head(10)
bottom10

### 6.1 Visualise top 10 countries

In [None]:
# Plot the top 10 countries as a horizontal bar chart
plt.figure(figsize=(10, 6))
plt.barh(top10['Country/Territory'], top10['GDP_per_capita'])

# Add labels and title
plt.xlabel('GDP per capita')
plt.title('Top 10 Countries')

# Highest GDP per capita at the top
plt.gca().invert_yaxis()

# Add gridlines and adjust layout
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

## 7. Distribution of GDP per capita

In [None]:
# Plot a histogram of GDP per capita values
plt.figure(figsize=(8, 5))
df_clean['GDP_per_capita'].plot(kind='hist', bins=30)

# Add labels and title
plt.xlabel('GDP per capita')
plt.ylabel('Frequency')
plt.title('GDP per Capita Distribution')

# Add gridlines and adjust layout
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 8. Conclusions
Summary of findings from global GDP per capita.