# Connect with the database

In [2]:
%%capture
%load_ext sql
%sql sqlite:///factbook.db

# View available tables from database

In [3]:
%%sql
SELECT * FROM sqlite_master
WHERE type = 'table';

 * sqlite:///factbook.db
Done.


type,name,tbl_name,rootpage,sql
table,sqlite_sequence,sqlite_sequence,3,"CREATE TABLE sqlite_sequence(name,seq)"
table,facts,facts,47,"CREATE TABLE ""facts"" (""id"" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, ""code"" varchar(255) NOT NULL, ""name"" varchar(255) NOT NULL, ""area"" integer, ""area_land"" integer, ""area_water"" integer, ""population"" integer, ""population_growth"" float, ""birth_rate"" float, ""death_rate"" float, ""migration_rate"" float)"


# Check the data from "facts" table

In [4]:
%%sql
SELECT * 
    FROM facts 
LIMIT 5;

 * sqlite:///factbook.db
Done.


id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
1,af,Afghanistan,652230,652230,0,32564342,2.32,38.57,13.89,1.51
2,al,Albania,28748,27398,1350,3029278,0.3,12.92,6.58,3.3
3,ag,Algeria,2381741,2381741,0,39542166,1.84,23.67,4.31,0.92
4,an,Andorra,468,468,0,85580,0.12,8.13,6.96,0.0
5,ao,Angola,1246700,1246700,0,19625353,2.78,38.78,11.49,0.46


# Description of available columns
    Population Growth: Percentage indicating annual population change.

    Birth Rate: Annual births per 1,000 people.

    Death Rate: Annual deaths per 1,000 people.

    Geographical Area: Total land and water area.

    Land Area: Exclusive land area in square kilometers.

    Water Area: Exclusive water area in square kilometers.

    Country Name: Name of the country.

# Create basic summary statistics

In [5]:
%%sql
SELECT 
    MIN(population) AS min_population,
    MAX(population) AS max_population,
    MIN(population_growth) AS min_population_growth,
    MAX(population_growth) AS max_population_growth
FROM facts;

 * sqlite:///factbook.db
Done.


min_population,max_population,min_population_growth,max_population_growth
0,7256490011,0.0,4.02


# Diving into suspicious data
There are countires with population 0 and over 7 billion, I will write new query to view name of this records

In [6]:
%%sql
SELECT *
FROM facts
WHERE population == (SELECT MIN(population)
                        FROM facts);

 * sqlite:///factbook.db
Done.


id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
250,ay,Antarctica,,280000,,0,,,,


In [7]:
%%sql
SELECT *
FROM facts
WHERE population == (SELECT MAX(population)
                        FROM facts);

 * sqlite:///factbook.db
Done.


id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
261,xx,World,,,,7256490011,1.08,18.6,7.8,



The table appears to have a row for the entire world with a population of over 7.2 billion and a separate row for Antarctica with a population of 0, I will now exclude World for my calculation

In [67]:
%%sql
SELECT 
    MIN(population) AS min_population,
    MAX(population) AS max_population,
    MIN(population_growth) AS min_population_growth,
    MAX(population_growth) AS max_population_growth
FROM facts
    WHERE name != 'World';

 * sqlite:///factbook.db
Done.


min_population,max_population,min_population_growth,max_population_growth
0,1367485388,0.0,4.02


# Average Population and Average Area
I will again exclude World for calculation

In [9]:
%%sql
SELECT AVG(population), AVG(area)
FROM facts
WHERE name != 'World';

 * sqlite:///factbook.db
Done.


AVG(population),AVG(area)
32242666.56846473,555093.546184739


We can see that average population is close to 33 million people and average area is around 555 thousand square km

# Create New View "facts_countries"
To avoid removing world from analysis in every query I will create new view without row World

In [29]:
%%sql
DROP VIEW IF EXISTS facts_countries;
CREATE VIEW facts_countries AS
SELECT * FROM facts WHERE name != 'World';

 * sqlite:///factbook.db
Done.
Done.


[]

In [31]:
%%sql
SELECT *
FROM facts_countries
WHERE population > (SELECT AVG(population)
    FROM facts_countries
    )
AND area < (SELECT AVG(area)
    FROM facts_countries
    );

 * sqlite:///factbook.db
Done.


id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
14,bg,Bangladesh,148460,130170,18290,168957745,1.6,21.14,5.61,0.46
65,gm,Germany,357022,348672,8350,80854408,0.17,8.47,11.42,1.24
80,iz,Iraq,438317,437367,950,37056169,2.93,31.45,3.77,1.62
83,it,Italy,301340,294140,7200,61855120,0.27,8.74,10.19,4.1
85,ja,Japan,377915,364485,13430,126919659,0.16,7.93,9.51,0.0
91,ks,"Korea, South",99720,96920,2800,49115196,0.14,8.19,6.75,0.0
120,mo,Morocco,446550,446300,250,33322699,1.0,18.2,4.81,3.36
138,rp,Philippines,300000,298170,1830,100998376,1.61,24.27,6.11,2.09
139,pl,Poland,312685,304255,8430,38562189,0.09,9.74,10.19,0.46
163,sp,Spain,505370,498980,6390,48146134,0.89,9.64,9.04,8.31


# View which country has the most people and which country has the highest growth rate

In [32]:
%%sql
SELECT 
  (SELECT name 
   FROM facts_countries
   WHERE population = (SELECT MAX(population) FROM facts_countries)) 
      AS CountryWithMostPeople,
  (SELECT name 
   FROM facts_countries
   WHERE population_growth = (SELECT MAX(population_growth) FROM facts_countries)) 
      AS CountryWithHighestGrowthRate;

 * sqlite:///factbook.db
Done.


CountryWithMostPeople,CountryWithHighestGrowthRate
China,South Sudan


# View which countries have the highest ratios of water to land

In [33]:
%%sql
SELECT 
    name,
    area_water,
    area_land, 
    ROUND(CAST(area_water AS float)/area_land,3) AS water_land_ratio 
FROM facts_countries 
    ORDER BY water_land_ratio DESC
    LIMIT 5;

 * sqlite:///factbook.db
Done.


name,area_water,area_land,water_land_ratio
British Indian Ocean Territory,54340,60,905.667
Virgin Islands,1564,346,4.52
Puerto Rico,4921,8870,0.555
"Bahamas, The",3870,10010,0.387
Guinea-Bissau,8005,28120,0.285


# View which countries have more water than land

In [34]:
%%sql
SELECT 
    name,
    area_water,
    area_land
FROM facts_countries
    WHERE area_water>area_land
    LIMIT 5;

 * sqlite:///factbook.db
Done.


name,area_water,area_land
British Indian Ocean Territory,54340,60
Virgin Islands,1564,346


It could be also done in previous query with water_land_ratio over 1

# View which countries will add the most people to their populations next year

In [53]:
%%sql
SELECT
    name,
    population,
    population_growth * population AS population_next_year,
    population_growth,
    (population_growth * population) - population AS new_people
FROM facts_countries 
    ORDER BY new_people DESC
    LIMIT 1;

 * sqlite:///factbook.db
Done.


name,population,population_next_year,population_growth,new_people
India,1251695584,1527068612.48,1.22,275373028.48


# Which countries have a higher death rate than birth rate?

In [64]:
%%sql
SELECT 
    name,
    birth_rate,
    death_rate 
FROM facts_countries 
    WHERE death_rate > birth_rate
    ORDER BY death_rate DESC 
    LIMIT 10;

 * sqlite:///factbook.db
Done.


name,birth_rate,death_rate
Ukraine,10.72,14.46
Bulgaria,8.92,14.44
Latvia,10.0,14.31
Lithuania,10.1,14.27
Russia,11.6,13.69
Serbia,9.08,13.66
Belarus,10.7,13.36
Hungary,9.16,12.73
Moldova,12.0,12.59
Estonia,10.51,12.4


# Which countries have the highest population/area ratio

In [69]:
%%sql
SELECT 
    name,
    population,
    area_land,
    population/area_land AS population_area_ratio 
FROM facts_countries 
    ORDER BY population_area_ratio DESC
    LIMIT 10;

 * sqlite:///factbook.db
Done.


name,population,area_land,population_area_ratio
Macau,592731,28,21168
Monaco,30535,2,15267
Singapore,5674472,687,8259
Hong Kong,7141106,1073,6655
Gaza Strip,1869055,360,5191
Gibraltar,29258,6,4876
Bahrain,1346613,760,1771
Maldives,393253,298,1319
Malta,413965,316,1310
Bermuda,70196,54,1299
