In [None]:
# %%
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# %%
drivers = pd.read_csv(
    "../files/input/drivers.csv",
    sep=",",
    thousands=None,
    decimal=".",
)
drivers.head()

In [None]:
# %%
timesheet = pd.read_csv(
    "../files/input/timesheet.csv",
    sep=",",
    thousands=None,
    decimal=".",
)
timesheet.head()


In [None]:
# %%
mean_timesheet = timesheet.groupby("driverId").mean()
mean_timesheet.head()

In [None]:
# %%
mean_timesheet.pop("week")
mean_timesheet.head()

In [None]:
#%%
mean_hours_logged_by_driver = timesheet.groupby("driverId")["hours-logged"].transform(
    "mean"
)
mean_hours_logged_by_driver.head(15)

In [None]:
# %%
timesheet_with_means = timesheet.copy()
timesheet_with_means["mean_hours-logged"] = mean_hours_logged_by_driver
timesheet_with_means.head()

In [None]:
# %%
timesheet_below = timesheet_with_means[
    timesheet_with_means["hours-logged"] < timesheet_with_means["mean_hours-logged"]
]
display(timesheet_below.head(), timesheet_below.tail())

In [None]:
# %%
sum_timesheet = timesheet.groupby("driverId").sum()
sum_timesheet.head(10)

In [None]:
# %%
sum_timesheet = sum_timesheet[["hours-logged", "miles-logged"]]
sum_timesheet.head()

In [None]:
timesheet.groupby("driverId")["hours-logged"].agg(["min", "max"])

In [None]:
summary = pd.merge(
    sum_timesheet,
    drivers[["driverId", "name"]],
    on="driverId",
)
summary

In [None]:
import os

if not os.path.exists("../files/output"):
    os.makedirs("../files/output")

summary.to_csv(
    "../files/output/summary.csv",
    sep=",",
    header=True,
    index=False,
)

In [None]:
top10 = summary.sort_values(by="miles-logged", ascending=False).head(10)
top10

In [None]:
# Creación de un gráfico de barras horizontales
# La columna 'name' pasa a ser el nombre de las filas
top10 = top10.set_index("name")

# Paleta de colores:
#   tab:blue     tab:red       tab:pink
#   tab:orange   tab:purple    tab:gray
#   tab:green    tab:brown     tab:olive
#   tab:cyan
top10["miles-logged"].plot.barh(color="tab:orange", alpha=0.6)

plt.gca().invert_yaxis()

plt.gca().get_xaxis().set_major_formatter(
    matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ","))
)

plt.xticks(rotation=90)

plt.gca().spines["left"].set_color("lightgray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)

if not os.path.exists("../files/plots"):
    os.makedirs("../files/plots")

plt.savefig("../files/plots/top10_drivers.png", bbox_inches="tight")