In [135]:
import pandas as pd 
import numpy as np

import geopandas as gpd
import plotly.express as px

import matplotlib.pyplot as plt


In [136]:
df = pd.read_csv('winemag-data-130k-v2.csv', index_col=0)
df.replace({'country': {'US': 'United States of America', 'England': 'United Kingdom'}}, inplace=True)
df.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,United States of America,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,United States of America,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,United States of America,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [137]:
median_scores = df.groupby('country')['points'].median().reset_index()
mean_scores = df.groupby('country')['points'].mean().reset_index()
count_scores = df.groupby('country')['points'].count().reset_index()

count_scores = count_scores.rename(columns={'points': 'number_of_reviews'})
median_scores = median_scores.rename(columns={'points': 'median_points'})
mean_scores = mean_scores.rename(columns={'points': 'mean_points'})

median_scores = median_scores.merge(mean_scores, on='country')
median_scores = median_scores.merge(count_scores, on='country')

# On garde uniquement les pays avec plus de 1000 reviews pour avoir une meilleure idée de la qualité moyenne/mediane des vins
median_scores = median_scores[median_scores['number_of_reviews'] > 100]

median_scores = median_scores.sort_values('median_points', ascending=False)

median_scores

Unnamed: 0,country,median_points,mean_points,number_of_reviews
3,Austria,90.0,90.101345,3345
7,Canada,90.0,89.36965,257
16,Germany,90.0,89.851732,2165
18,Hungary,89.0,89.191781,146
6,Bulgaria,89.0,87.93617,141
14,France,89.0,88.845109,22093
2,Australia,89.0,88.580507,2329
20,Israel,89.0,88.471287,505
35,South Africa,88.0,88.056388,1401
41,United States of America,88.0,88.56372,54504


In [138]:
# Number of reviews per country

df_country = df.groupby('country')['points'].count().reset_index()
df_country = df_country.rename(columns={'points': 'number_of_reviews'})
df_country = df_country.sort_values('number_of_reviews', ascending=False)

df_country.head()

Unnamed: 0,country,number_of_reviews
41,United States of America,54504
14,France,22093
21,Italy,19540
36,Spain,6645
30,Portugal,5691


In [139]:
geojson_url = 'https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json'

# Create a choropleth map
fig = px.choropleth(
    df_country, 
    geojson=geojson_url, 
    locations='country',
    color='number_of_reviews',
    color_continuous_scale='YlOrRd',
    featureidkey='properties.name',  # Make sure this key matches the property in the GeoJSON
    projection='natural earth',
    labels={'number_of_reviews': 'Number of reviews'}
)

# Update layout to enhance the map's appearance
fig.update_geos(showcountries=True, countrycolor="RebeccaPurple")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# Show the figure
fig.show()

In [140]:
geojson = 'https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json'

fig = px.choropleth(median_scores, geojson=geojson, locations='country', color='median_points',
                    color_continuous_scale="Greens",
                    labels={'median_points':'Median Score of Wines'},
                    featureidkey="properties.name",
                    projection="natural earth")

fig.update_geos(showcountries=True, countrycolor="RebeccaPurple")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(title_text='')
fig.show()