In [None]:
# GDP Growth Analysis Project
## Day 1: Data Fetching

**Project Goal:** Analyze GDP growth across continents and countries

**Data Sources:**
- REST Countries API: Country information and continent mapping
- World Bank API: GDP data from 2000-2023

---

In [None]:
# Import libraries (these are tools we'll use)
import pandas as pd  # For working with data tables
import requests      # For getting data from websites
import json         # For working with JSON data format
import time         # For adding delays between requests

print("🚀 Starting GDP Growth Analysis Project!")
print("=" * 50)

# Step 1: Get data about countries and their continents (FIXED)
def fetch_country_data():
    """
    This function gets information about all countries including which continent they're in.
    It's like asking a website: "Tell me about all countries in the world"
    FIXED: Now properly specifies which fields we want from the API
    """
    print("📍 Getting country information...")
    
    try:
        # Make a request to the REST Countries API with specific fields
        # We only ask for the data we actually need
        url = "https://restcountries.com/v3.1/all"
        params = {
            'fields': 'name,region,subregion,continents,cca2,cca3'
        }
        response = requests.get(url, params=params)
        
        # Check if the request was successful
        if response.status_code == 200:
            print("✅ Successfully got country data!")
            return response.json()
        else:
            print(f"❌ Error: {response.status_code}")
            print(f"Response: {response.text}")
            return None
            
    except Exception as e:
        print(f"❌ Something went wrong: {e}")
        return None

# Step 2: Get GDP data from World Bank
def fetch_gdp_data():
    """
    This function gets GDP data for all countries from 2000 to 2023.
    GDP = Gross Domestic Product (how much money a country makes in a year)
    """
    print("💰 Getting GDP data from World Bank...")
    
    try:
        # World Bank API endpoint for GDP data
        url = "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.MKTP.CD"
        
        # Parameters for our request
        params = {
            'format': 'json',           # We want JSON format
            'date': '2000:2023',        # Years 2000 to 2023
            'per_page': 20000           # Get lots of data at once
        }
        
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            data = response.json()
            # The World Bank API returns data in a specific format
            # The actual data is in the second element [1]
            if len(data) > 1 and data[1] is not None:
                print("✅ Successfully got GDP data!")
                print(f"📊 Retrieved {len(data[1])} data points")
                return data[1]  # Return the actual data
            else:
                print("❌ No GDP data found")
                return None
        else:
            print(f"❌ Error: {response.status_code}")
            print(f"Response: {response.text}")
            return None
            
    except Exception as e:
        print(f"❌ Something went wrong: {e}")
        return None

# Step 3: Simple API test function (for troubleshooting)
def test_apis():
    """
    Simple test to make sure both APIs are working
    """
    print("🧪 Testing APIs...")
    
    # Test 1: REST Countries
    print("\n1️⃣ Testing REST Countries API...")
    url1 = "https://restcountries.com/v3.1/all"
    params1 = {'fields': 'name'}
    try:
        response1 = requests.get(url1, params=params1)
        print(f"   Status: {response1.status_code}")
        if response1.status_code == 200:
            print("   ✅ REST Countries API is working!")
        else:
            print(f"   ❌ Error: {response1.text[:100]}...")
    except Exception as e:
        print(f"   ❌ Connection error: {e}")
    
    # Test 2: World Bank
    print("\n2️⃣ Testing World Bank API...")
    url2 = "https://api.worldbank.org/v2/country/US/indicator/NY.GDP.MKTP.CD"
    params2 = {'format': 'json', 'date': '2022', 'per_page': 1}
    try:
        response2 = requests.get(url2, params=params2)
        print(f"   Status: {response2.status_code}")
        if response2.status_code == 200:
            print("   ✅ World Bank API is working!")
        else:
            print(f"   ❌ Error: {response2.text[:100]}...")
    except Exception as e:
        print(f"   ❌ Connection error: {e}")
    
    print("\n🧪 API tests complete!")

# Step 4: Let's run our functions and get the data!
print("\n🔧 First, let's test if the APIs are working...")
test_apis()

print("\n🔄 Now starting full data collection...")

# Get country data
countries_data = fetch_country_data()
if countries_data:
    print(f"📝 Found information for {len(countries_data)} countries")
else:
    print("⚠️ Could not get country data. Check your internet connection.")

# Small delay to be nice to the APIs
time.sleep(2)

# Get GDP data  
gdp_data = fetch_gdp_data()
if gdp_data:
    print(f"💹 Found GDP data with {len(gdp_data)} records")
else:
    print("⚠️ Could not get GDP data. This might be a temporary issue.")

print("\n✨ Data fetching complete!")

# Step 5: Let's take a quick look at what we got
if countries_data and len(countries_data) > 0:
    print("\n🔍 Sample country data (first country):")
    first_country = countries_data[0]
    print(f"   Country: {first_country.get('name', {}).get('common', 'Unknown')}")
    print(f"   Region: {first_country.get('region', 'Unknown')}")
    if first_country.get('continents') and len(first_country.get('continents', [])) > 0:
        print(f"   Continent: {first_country.get('continents')[0]}")
    else:
        print(f"   Continent: Unknown")
    
    # Let's see a few more examples
    print(f"\n📋 Quick preview of first 5 countries:")
    for i in range(min(5, len(countries_data))):
        country = countries_data[i]
        name = country.get('name', {}).get('common', 'Unknown')
        region = country.get('region', 'Unknown')
        continent = country.get('continents', ['Unknown'])[0] if country.get('continents') else 'Unknown'
        print(f"   {i+1}. {name} - {region} ({continent})")
else:
    print("\n❌ No country data to display")

if gdp_data and len(gdp_data) > 0:
    print("\n🔍 Sample GDP data (first few records):")
    for i in range(min(3, len(gdp_data))):
        record = gdp_data[i]
        country_name = record.get('country', {}).get('value', 'Unknown')
        year = record.get('date', 'Unknown')
        gdp_value = record.get('value')
        
        if gdp_value is not None:
            print(f"   {i+1}. {country_name} ({year}): ${gdp_value:,.0f}")
        else:
            print(f"   {i+1}. {country_name} ({year}): No data")
            
    # Count how many records have actual GDP values
    records_with_data = sum(1 for record in gdp_data if record.get('value') is not None)
    print(f"   📊 {records_with_data} out of {len(gdp_data)} records have GDP values")
else:
    print("\n❌ No GDP data to display")

# Step 6: Summary of what we collected
print("\n" + "="*50)
print("📊 DATA COLLECTION SUMMARY:")
print("="*50)

if countries_data:
    print(f"✅ Countries: {len(countries_data)} countries collected")
    # Count unique continents
    continents = set()
    for country in countries_data:
        if country.get('continents'):
            continents.update(country.get('continents', []))
    print(f"🌍 Continents covered: {len(continents)} ({', '.join(sorted(continents))})")
else:
    print("❌ Countries: No data collected")

if gdp_data:
    # Count unique countries and years in GDP data
    countries_in_gdp = set(record.get('country', {}).get('value') for record in gdp_data if record.get('country'))
    years_in_gdp = set(record.get('date') for record in gdp_data if record.get('date'))
    records_with_values = sum(1 for record in gdp_data if record.get('value') is not None)
    
    print(f"✅ GDP Data: {len(gdp_data)} total records")
    print(f"📈 Countries with GDP data: {len(countries_in_gdp)}")
    print(f"📅 Years covered: {min(years_in_gdp)} to {max(years_in_gdp)}")
    print(f"💰 Records with values: {records_with_values}")
else:
    print("❌ GDP Data: No data collected")
