In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [3]:
# Exercício 1
owid_energy_data = pd.read_csv('owid-energy-data.csv')

countries = ['United States', 'Canada', 'Brazil', 'Mexico']
filtered_index = owid_energy_data.country.isin(countries)
energy_data = owid_energy_data[filtered_index]
energy_data.to_csv('filtered-energy-data.csv')

In [None]:
# Exercício 2
plt.figure()
colors = ['blue', 'red', 'green', 'yellow']

for country, color in zip(countries, colors):
    data = energy_data[(energy_data.country == country)
                       & (energy_data['oil_electricity'].notnull())]
    plt.plot(data.year, data['oil_electricity'], '-', color=color, label=country)

plt.xlabel('year')
plt.ylabel('oil electricity')
plt.title('Electricity production from oil in certain countries')
plt.legend()
plt.show()

In [None]:
# Exercício 3
usa_energy_data = energy_data[(energy_data.country == 'United States')
                              & (energy_data.year == 2010)]

elec_sources = ['coal_electricity', 'biofuel_electricity',
                'fossil_electricity', 'gas_electricity',
                'hydro_electricity', 'nuclear_electricity',
                'oil_electricity']

usa_electricity = usa_energy_data[elec_sources].values.flatten().tolist()

plt.figure()
plt.title('Electricity production from the United States in 2010')
elec_sources = [s.replace('_', ' ').capitalize() for s in elec_sources]
plt.pie(usa_electricity, labels=elec_sources)
plt.show()

In [None]:
# Exercício 4
def highest_nuclear_consumption(country: str) -> None:
    country_data: pd.DataFrame = energy_data.loc[(energy_data.country == country),
                                                 ['year', 'nuclear_consumption']]
    i = country_data['nuclear_consumption'].idxmax()
    year = country_data.loc[i, 'year']
    consumption = country_data.loc[i, 'nuclear_consumption']
    print(f'Nuclear consumption of {country} in {year} was {consumption}')

In [None]:
print('Highest nuclear consumption year per country:')
[highest_nuclear_consumption(country) for country in countries]

In [None]:
# Exercício 5
sns.regplot(data=owid_energy_data, x='gas_consumption', y='nuclear_consumption')

plt.xlabel('Gas consumption')
plt.ylabel('Nuclear consumption')
plt.title(f'Gas consumption / Nuclear consumption')

grid = sns.FacetGrid(energy_data, col="country", hue="country", col_wrap=2,
                     sharex=False, sharey=False)
grid.map(sns.regplot, 'gas_consumption', 'nuclear_consumption')
grid.add_legend()

plt.show()

In [None]:
# Exercício 6
from warnings import simplefilter
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor

simplefilter("ignore")

# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 4))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
)
%config InlineBackend.figure_format = 'retina'


def plot_multistep(y, every=1, ax=None, palette_kwargs=None):
    palette_kwargs_ = dict(palette='husl', n_colors=16, desat=None)
    if palette_kwargs is not None:
        palette_kwargs_.update(palette_kwargs)
    palette = sns.color_palette(**palette_kwargs_)
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_prop_cycle(plt.cycler('color', palette))
    for date, preds in y[::every].iterrows():
        preds.index = pd.period_range(start=date, periods=len(preds))
        preds.plot(ax=ax)
    return ax

In [4]:
# Exercício 6
solar_data: pd.DataFrame = energy_data.loc[(energy_data.country == countries[0])
                                           & (energy_data['solar_consumption'].notnull()),
                                           ['year', 'solar_consumption']]
solar_data.set_index('year', drop=True)
print(solar_data.head())

       year  solar_consumption
16126  1965                0.0
16127  1966                0.0
16128  1967                0.0
16129  1968                0.0
16130  1969                0.0
