In [None]:
# tidyverse
library(ggplot2)
library(readr)
library(tibble)
library(glue)

# others
library(latex2exp)
library(scales)


# Exercise 1 - Vectors and data frames



In [None]:
df_lakes <- read.csv("../data/lochs_of_Scotland.csv", header=TRUE, sep=",")
df_lakes


In [None]:
# Remove columns not containing volume or area and in [mi]
df_lakes <- df_lakes[, c(1, 2, 4)]

# Rename the columns
colnames(df_lakes) <- c('Loch', 'Volume', 'Area')
df_lakes


## 1. Evaluate the highest and lowest volume and area lake



In [None]:
# indices 
idx_max_vol <- which.max(df_lakes$Volume)
idx_min_vol <- which.min(df_lakes$Volume)
idx_max_area <- which.max(df_lakes$Area)
idx_min_area <- which.min(df_lakes$Area)

# results 

cat(
  'Highest volume lake:', df_lakes$Loch[idx_max_vol], ', with volume ', df_lakes$Volume[idx_max_vol], 'km^3\n',
  'Lowest volume lake:', df_lakes$Loch[idx_min_vol], ', with volume ', df_lakes$Volume[idx_min_vol], 'km^3\n', 
  'Highest area lake:', df_lakes$Loch[idx_max_area], ', with area ', df_lakes$Area[idx_max_area], 'km^2\n',
  'Lowest area lake:', df_lakes$Loch[idx_min_area], ', with area ', df_lakes$Area[idx_min_area], 'km^2'
  
  )


## 2. Order the frame with respect to the area and determine the two largest area lakes

Ordered dataframe:


In [None]:
lakes_byarea <- df_lakes[order(df_lakes$Area, decreasing=TRUE), ]
lakes_byarea 


In [None]:
largest_area_2 = lakes_byarea$Loch[1:2]

glue('2 largest area lakes: {largest_area_2[1]} and {largest_area_2[2]}')


## 3. By summing up the areas occupied by the lakes, determine the area of Scotland covered by water



In [None]:
area_water <- sum(df_lakes$Area)
glue('Area of Scotland covered by water: {area_water} km^2')


Reference and data: <https://en.wikipedia.org/wiki/List_of_lochs_of_Scotland>

# Exercise 2 - Crude Oil Production

## 1. Write R code that is able to read the file and import it in a data frame structure

The last column of the data frame contains data on crude oil prices from 1861 to 2020, measured in US dollars per barrel.


In [None]:
df_oilprices <- read.csv("../data/crude-oil-prices.csv", header=TRUE, sep=",")
colnames(df_oilprices) <- c(names(df_oilprices[1:3]), 'Price') 

df_oilprices


## 2. Produce a plot with the Oil price as a function of the year



In [None]:
gg <- ggplot(df_oilprices, aes(x=Year, y=Price))+
  geom_point(col='navyblue') + 
  labs(title="Crude Oil Prices from 1861 to 2020 ($/barrel)", 
       y="Price",
       x='Year', 
       caption = "Source: https://ourworldindata.org/grapher/crude-oil-prices") +
  scale_y_continuous(
    breaks = seq(0, 120, 15),
    minor_breaks = NULL
  ) + 
  scale_x_continuous(
    breaks = seq(1850, 2050, 25),
    minor_breaks = NULL,
    limits=c(1850, 2025)
  )+
  theme_bw()


plot(gg)


## 3. Which is the highest price in history ? When did it occur ?



In [None]:
highest_price <- max(df_oilprices$Price)
highest_price_year <- df_oilprices$Year[which.max(df_oilprices$Price)]

glue('Highest price in hystory: {format(highest_price, digits=5)} $/barrel.\n
  It occured in {highest_price_year}.')


## 4. Plot the derivative of the curve, simply evaluated with the finite difference formula

$$
\frac{\partial price}{\partial year} = price_{j+1}-price{j}
$$


In [None]:
prices <- df_oilprices$Price
years <- df_oilprices$Year
derivatives <- prices[2:length(prices)]-prices[1:length(prices)-1]

df_derivatives <- data.frame(years[1:length(years)-1], derivatives)
colnames(df_derivatives) <- c('Year', 'Derivative')

head(df_derivatives)


In [None]:
gg <- ggplot(df_derivatives, aes(x=Year, y=Derivative))+
  geom_point(col='navyblue') + 
  labs(title="Annual Variation of Crude Oil Prices from 1861 to 2020 ($/barrel)", 
       y=TeX("$\\Delta$Price"),
       x='Year', 
       caption = "Source: https://ourworldindata.org/grapher/crude-oil-prices") +
  scale_x_continuous(
    breaks = seq(1850, 2050, 25),
    minor_breaks = NULL,
    limits=c(1850, 2025)
  ) +
  scale_y_continuous(
    breaks = seq(-45, 45, 15),
    minor_breaks = NULL,
  )+
  theme_bw()

plot(gg)


Reference and data: <https://ourworldindata.org/grapher/crude-oil-prices>

# Exercise 3 - World Coal Production

## 1. Write R code that is able to read the file and import it in a tibble structure


In [None]:
coal_prod <- read_csv("../data/coal-production-by-country.csv", col_names=TRUE)
colnames(coal_prod) <- c(names(coal_prod[1:3]), 'Production')

coal_prod


In [None]:
is_tibble(coal_prod)



## 2. Count the number of countries available in the file and produce a barplot with the number of entries for each country



In [None]:
countries <- unique(coal_prod$Entity)
cat('Number of countries:', length(countries))


In [None]:
gg <- ggplot(data=coal_prod, aes(x=Entity)) +
  geom_bar(stat = "count", width=0.7, fill="steelblue") + 
  coord_flip() +
  scale_x_discrete(limits=rev) +
  labs(title="Number of entries for each country", 
       x="Country",
       y="Count", 
       caption = "Source: https://ourworldindata.org/grapher/coal-production-by-country") +
  theme_bw()

gg


## 3. Selecting only the year after 1970, determine the total integrated production for each country and print the top 5 countries with highest coal productions

## 4. For the 5 top Countries, create a plot of production as a function of time

## 5. Generate a plot with the cumulative sum of the World's coal production over the years

Reference and data: <https://ourworldindata.org/grapher/coal-production-by-country>

# Exercise 4 - Covid19 Vaccine data

## File 'vaccinations-by-manufacturer.csv'

### 1. Filter() the original tibble by selecting the following countries: Italy

### 2. Plot the number of vaccines given as a function of time for the different vaccine manufacturer

### 3. From the same tibble plot the total number of vaccines shot per day in Italy

### 4. Do the same exercise for the following countries: Germany and United States of America

Data: <https://github.com/owid/covid-19-data/blob/master/public/data/vaccinations/vaccinations-by-manufacturer.csv>

## File 'vaccinations.csv'

### 1. Selecting all the European countries in the tibble, plot the number of daily vaccinations per million as a function of date

### 2. Study the data structure and produce few relevant plots of your taste

Data: <https://github.com/owid/covid-19-data/blob/master/public/data/vaccinations/vaccinations.csv>

## 
