In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Import the csv file as pandas DataFrame
df_raw = pd.read_csv("CSVs/question2.csv")

In [None]:
df_raw.info()

In [None]:
# Create a new column to store the number of users in million
df_raw["users_count_million"] = np.round(df_raw["users_count"]/1000000, 1)

In [None]:
# Keep only countries with at least one milion users
df = df_raw[df_raw["users_count"] >= 1000000]

In [None]:
plt.figure(figsize=(16, 10))

sns.set_theme(style="darkgrid", context="talk")
sns.color_palette("Paired")

ax = sns.barplot(data=df, x="users_count_million", y="name", orient="y", estimator="sum", errorbar=None, color="green")
ax.bar_label(ax.containers[0])
ax.set(xlim=(0, 16))

ax.set_xlabel("Number of users in million", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_ylabel("Country", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_title("Countries with the most users", y=1.05, fontweight='bold')

plt.show()

In [None]:
# Import the csv file as pandas DataFrame
grapes = pd.read_csv("CSVs/top_grapes.csv")

# Keep only the grapes used in more tha one country
grapes = grapes[grapes["countries_count"] > 1]

In [None]:
plt.figure(figsize=(16, 10))

sns.set_theme(style="darkgrid", context="talk")
sns.color_palette("Paired")

ax = sns.barplot(data=grapes, x="countries_count", y="grape_name", orient="y", estimator="sum", errorbar=None)
ax.bar_label(ax.containers[0])
ax.set(xlim=(0, 16))

ax.set_xlabel("Number of countries", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_ylabel("Grape name", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_title("Grappes used in many countries", y=1.05, fontweight='bold')

plt.show()

In [None]:
# Import the csv file as pandas DataFrame
wines = pd.read_csv("CSVs/question5.csv")

In [None]:
# Add the year to the wine name to distinguish wines with the same name but from different vintages
wines["wine_name"] = wines["wine_name"].astype(str) + " " + wines["year"].astype(str)

In [None]:
# Get the top five wines of each grape

top5_cabernet = wines[wines["grape_name"] == "Cabernet Sauvignon"].sort_values(
                                by=["ratings_average", "ratings_count"], ascending=[False, False]).head(5)

top5_chardonnay = wines[wines["grape_name"] == "Chardonnay"].sort_values(
                                by=["ratings_average", "ratings_count"], ascending=[False, False]).head(5)

top5_merlot = wines[wines["grape_name"] == "Merlot"].sort_values(
                                by=["ratings_average", "ratings_count"], ascending=[False, False]).head(5)

In [None]:
# Concatenate these three DataFrames into one

top5_wines = pd.concat([top5_cabernet, top5_chardonnay, top5_merlot], ignore_index=True)

top5_wines

In [None]:
plt.figure(figsize=(16, 10))

sns.set_theme(style="darkgrid", context="talk")
sns.color_palette("Paired")

ax = sns.barplot(data=top5_wines, x="ratings_average", y="wine_name", orient="y",
                 hue="grape_name", errorbar=None, legend="brief")

ax.bar_label(ax.containers[0])
ax.bar_label(ax.containers[1])
ax.bar_label(ax.containers[2])
ax.set(xlim=(0, 7))

ax.legend(title='Grape Name')
ax.set_xlabel("Ratings Average", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_ylabel("Wine Name", fontfamily='sans-serif', fontstyle='normal', fontweight='bold')
ax.set_title("Top 5 wines of each of the three most used grappes",
             y=1.05, fontweight='bold', fontsize=20)

plt.show()