# Libraries

In [12]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import

In [13]:
df = pd.read_csv('winemag-data-130k-clean.csv')

In [14]:
df.head(
)

Unnamed: 0,country,description,designation,points,price,province,region_1,taster_name,title,variety,winery,continent
0,Italy,aromas include tropical fruit broom brimstone ...,Vulkà Bianco,87,,Sicily & Sardinia,Etna,Kerin O’Keefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,Europe
1,Portugal,ripe fruity wine that smooth while still struc...,Avidagos,87,15.0,Douro,,Roger Voss,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,Europe
2,US,tart snappy flavors lime flesh rind dominate. ...,,87,14.0,Oregon,Willamette Valley,Paul Gregutt,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,North America
3,US,pineapple rind lemon pith orange blossom start...,Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,Alexander Peartree,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,North America
4,US,regular bottling 2012 comes across rather roug...,Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Paul Gregutt,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,North America


#  Est-ce que la quantité de production de vin d'un pays influence la qualité de ses vins ?

In [15]:
df.continent.value_counts()

continent
Europe           55967
North America    50683
South America     7959
Oceania           3475
Africa            1336
Asia               509
Name: count, dtype: int64

In [16]:
df.dropna(subset=['continent'], inplace=True)

In [17]:
df[(df['continent'].isnull())]

Unnamed: 0,country,description,designation,points,price,province,region_1,taster_name,title,variety,winery,continent


In [18]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(1, 1)

continents = df['continent'].unique()

trace_continents = [] # List to keep track of the continent for each trace

for continent in continents:
    df_continent = df[df['continent'] == continent]
    countries = df_continent['country'].unique()
    for country in countries:
        df_country = df_continent[df_continent['country'] == country]
        fig.add_trace(
            go.Scatter(
                x=[df_country['price'].mean(axis=0)],
                y=[df_country['points'].mean(axis=0)],
                mode='markers',
                name=country,
                marker=dict(size=12, line=dict(width=1)),
                text=df_country['country'],
                visible=False
            )
        )
        trace_continents.append(continent)

buttons = []

for continent in continents:
    buttons.append(
        dict(
            label=continent,
            method='update',
            args=[{'visible': [continent == trace_continent for trace_continent in trace_continents]},
                  {'title': f'Distribution of Quality/Quantity by Country in {continent}'}]
        )
    )

# Button to change continent
buttons.append(
    dict(
        label='All',
        method='update',
        args=[{'visible': [True] * len(trace_continents)},
              {'title': 'Distribution of Quality/Quantity by Country'}]
    )
)

fig.update_layout(
    showlegend=True,
    updatemenus=[{
        'buttons': buttons,
        'direction': 'down',
        'pad': {'r': 10, 't': 10},
        'showactive': True,
        'x': 0.1,
        'xanchor': 'left',
        'y': 1.2,
        'yanchor': 'top'
    }],
    xaxis_title='Quantity of Production (price)',
    yaxis_title='Average Points (quality)'
)

fig.show()


In [19]:
df.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,taster_name,title,variety,winery,continent
0,Italy,aromas include tropical fruit broom brimstone ...,Vulkà Bianco,87,,Sicily & Sardinia,Etna,Kerin O’Keefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,Europe
1,Portugal,ripe fruity wine that smooth while still struc...,Avidagos,87,15.0,Douro,,Roger Voss,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,Europe
2,US,tart snappy flavors lime flesh rind dominate. ...,,87,14.0,Oregon,Willamette Valley,Paul Gregutt,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,North America
3,US,pineapple rind lemon pith orange blossom start...,Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,Alexander Peartree,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,North America
4,US,regular bottling 2012 comes across rather roug...,Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Paul Gregutt,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,North America


In [20]:
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# Load GeoDataFrame with country geometries
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge your data with the GeoDataFrame
df_geo = world.merge(df, left_on='name', right_on='country', how='right')
df_geo.head()


df_geo['avg_quality'] = df_geo.groupby('country')['score'].transform('mean')
df_geo['avg_quantity'] = df_geo.groupby('country')['price'].transform('mean')


fig = go.Figure()

# Loop through each continent and each country to add the scattergeo plots
for continent in df_geo['continent_x'].unique():
    geo_continent = df_geo[df_geo['continent_x'] == continent]
    for country in geo_continent['country'].unique():
        geo_country = geo_continent[geo_continent['country'] == country]
        centroid = geo_country.geometry.centroid.iloc[0]  # get the centroid for the country

        fig.add_trace(
            go.Scattergeo(
                lon=[centroid.x],
                lat=[centroid.y],
                text=geo_country['country'],
                marker=dict(
                    size=geo_country['avg_quantity'],
                    color=geo_country['avg_quality'],
                    colorscale='Viridis',  # You can choose your own colorscale
                    showscale=True,
                    sizemode='area',
                    sizeref=2.*max(df_geo['avg_quantity'])/(40.**2),  # Adjust sizeref for appropriate bubble sizes
                    line=dict(width=0)
                ),
                name=country,
                visible=(continent == df_geo['continent_x'].unique()[0])  # Only the first continent is visible by default
            )
        )

# Create buttons for the interactive menu
buttons = []
for i, continent in enumerate(df_geo['continent_x'].unique()):
    buttons.append(
        dict(
            label=continent,
            method='update',
            args=[{'visible': [continent == c for c in df_geo['continent_x']]},
                  {'title': f'Distribution of Quality/Quantity by Country in {continent}'}]
        )
    )

# Add button to show all continents
buttons.append(
    dict(
        label='All',
        method='update',
        args=[{'visible': [True] * len(df_geo['continent_x'].unique())},
              {'title': 'Distribution of Quality/Quantity by Country'}]
    )
)

# Update layout for the figure
fig.update_layout(
    updatemenus=[{
        'buttons': buttons,
        'direction': 'down',
        'pad': {'r': 10, 't': 10},
        'showactive': True,
        'x': 0.1,
        'xanchor': 'left',
        'y': 1.1,
        'yanchor': 'top'
    }],
    geo=dict(
        projection_type='equirectangular',
        showland=True,
        landcolor='lightgrey'
    ),
)

# Display the figure
fig.show()



The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.



KeyError: 'Column not found: score'