Question 3:

#### We would like to give awards to the best wineries. Come up with 3 relevant ones. 
#### Which wineries should we choose and why?

In [16]:
import sqlite3

def get_top_wineries_rank1_awards_with_awards(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to select the top 3 wineries with the most rank 1 awards
        cursor.execute("""
            SELECT wineries.name, COUNT(*) AS rank1_awards_count, GROUP_CONCAT(toplists.name) AS awards_received
            FROM vintage_toplists_rankings
            JOIN vintages ON vintage_toplists_rankings.vintage_id = vintages.id
            JOIN wines ON vintages.wine_id = wines.id        
            JOIN wineries ON wines.winery_id = wineries.id
            JOIN toplists ON vintage_toplists_rankings.top_list_id = toplists.id
            WHERE vintage_toplists_rankings.rank = 1
            GROUP BY wineries.name
            ORDER BY rank1_awards_count DESC
        """)

        # Fetch the results
        top_wineries_rank1_awards_with_awards = cursor.fetchall()

        # Display the fetched data
        for row in top_wineries_rank1_awards_with_awards:
            winery_name, rank1_awards_count, awards_received = row
            awards_list = awards_received.split(',')  # Split the awards into a list
            awards_formatted = "\n".join(awards_list)  # Join the awards with line breaks
            print(f"Winery: {winery_name}, Rank 1 Awards Count: {rank1_awards_count}\n{awards_formatted}\n")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

db_file_path = "../db/vivino.db"

# Call the function to get top 3 wineries with the most rank 1 awards and their awards
get_top_wineries_rank1_awards_with_awards(db_file_path)


Winery: Masseto, Rank 1 Awards Count: 4
Vivino's 2020 Wine Style Awards: Tuscan Red
Vivino's 2016 Wine Style Awards: Central Italy Red
Vivino's 2017 Wine Style Awards: Central Italy Red
Vivino's 2018 Wine Style Awards: Tuscan Red

Winery: Gaja, Rank 1 Awards Count: 4
Vivino's 2016 Wine Style Awards: Italian Barbaresco
Vivino's 2018 Wine Style Awards: Italian Barbaresco
Vivino's 2018 Wine Style Awards: Italian Nebbiolo
Vivino's 2016 Wine Style Awards: Northern Italy White

Winery: Château Rayas, Rank 1 Awards Count: 4
Vivino's 2020 Wine Style Awards: Southern Rhône Châteauneuf-du-Pape Red
Vivino's 2016 Wine Style Awards: Southern Rhône Red
Vivino's 2017 Wine Style Awards: Southern Rhône Red
Vivino's 2018 Wine Style Awards: Southern Rhône Châteauneuf-du-Pape Red

Winery: La Grange des Pères, Rank 1 Awards Count: 3
Vivino's 2016 Wine Style Awards: Languedoc-Roussillon White
Vivino's 2020 Wine Style Awards: Languedoc-Roussillon Red
Vivino's 2020 Wine Style Awards: Languedoc-Roussillon Whit

Question 6:
#### We would like to create a country leaderboard. Come up with a visual that shows the average wine rating for each country. 
#### Do the same for the vintages.

In [23]:
import sqlite3
import pandas as pd
import plotly.express as px

def get_avg_wine_rating_per_country(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)

    try:
        # Execute SQL query to retrieve the top wine for each country
        query = """
            SELECT 
                countries.code, 
                countries.name AS country_name,
                AVG(wines.ratings_average) AS avg_rating,
                wines.name AS top_wine_name
            FROM 
                wines
            JOIN 
                regions ON wines.region_id = regions.id
            JOIN 
                countries ON regions.country_code = countries.code
            GROUP BY 
                countries.code
        """
        cursor = conn.cursor()
        cursor.execute(query)

        # Fetch all rows
        rows = cursor.fetchall()

        # Convert rows to a list of dictionaries
        data = [{'code': convert_to_alpha3(row[0]), 'country_name': row[1], 'avg_rating': row[2]} for row in rows]

        return data
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

# Function to convert two-letter country code to ISO 3166-1 alpha-3 format
def convert_to_alpha3(code):
    country_codes = {
        'ar': 'ARG', 'au': 'AUS', 'ch': 'CHE', 'cl': 'CHL', 'de': 'DEU', 'es': 'ESP', 'fr': 'FRA', 'gr': 'GRC',
        'hr': 'HRV', 'hu': 'HUN', 'il': 'ISR', 'it': 'ITA', 'md': 'MDA', 'pt': 'PRT', 'ro': 'ROU', 'us': 'USA',
        'za': 'ZAF'
    }
    return country_codes.get(code, code)  # Return the alpha-3 code if available, otherwise return the original code

# Provide the path to your SQLite database file
db_file_path = "../db/vivino.db"

# Call the function to retrieve the data
data = get_avg_wine_rating_per_country(db_file_path)

# If data is not None, create a choropleth map using Plotly Express
if data:
    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Create a choropleth map using Plotly Express
    fig = px.choropleth(df, 
                        locations='code', 
                        color='avg_rating',
                        color_continuous_scale="RdYlGn",
                        hover_name="country_name",
                        hover_data={'avg_rating': ':.2f'},
                        projection="natural earth",
                        labels={'avg_rating': 'Avg Rating'})

    # Update layout settings
    fig.update_layout(
        title_text='Wine Review per Country',
        geo=dict(
            showland=True,
            showcountries=True,
            showcoastlines=True,
            projection_type='natural earth'
        )
    )

    # Show the figure
    fig.show()


In [25]:
import sqlite3
import pandas as pd
import plotly.express as px

def get_avg_vintage_rating_per_country(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)

    try:
        # Execute SQL query to calculate the average vintage rating for each country
        query = """
            SELECT 
                countries.code, 
                countries.name AS country_name,
                AVG(vintages.ratings_average) AS avg_rating
            FROM 
                vintages
            JOIN 
                wines ON vintages.wine_id = wines.id
            JOIN 
                regions ON wines.region_id = regions.id
            JOIN 
                countries ON regions.country_code = countries.code
            WHERE
                vintages.ratings_average > 0  -- Ignore vintages with zero rating
            GROUP BY 
                countries.code
        """
        cursor = conn.cursor()
        cursor.execute(query)

        # Fetch all rows
        rows = cursor.fetchall()

        # Convert rows to a list of dictionaries
        data = [{'code': convert_to_alpha3(row[0]), 'country_name': row[1], 'avg_rating': row[2]} for row in rows]

        return data

    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

# Function to convert two-letter country code to ISO 3166-1 alpha-3 format
def convert_to_alpha3(code):
    country_codes = {
        'ar': 'ARG', 'au': 'AUS', 'ch': 'CHE', 'cl': 'CHL', 'de': 'DEU', 'es': 'ESP', 'fr': 'FRA', 'gr': 'GRC',
        'hr': 'HRV', 'hu': 'HUN', 'il': 'ISR', 'it': 'ITA', 'md': 'MDA', 'pt': 'PRT', 'ro': 'ROU', 'us': 'USA',
        'za': 'ZAF'
    }
    return country_codes.get(code, code)  # Return the alpha-3 code if available, otherwise return the original code

# Provide the path to your SQLite database file
db_file_path = "../db/vivino.db"

# Call the function to retrieve the data
data = get_avg_vintage_rating_per_country(db_file_path)

# If data is not None, create a choropleth map using Plotly Express
if data:
    # Convert country codes to ISO 3166-1 alpha-3 format
    df = pd.DataFrame(data)

    # Create a choropleth map using Plotly Express
    fig = px.choropleth(df, 
                        locations='code', 
                        color='avg_rating',
                        color_continuous_scale="RdYlGn",
                        hover_name="country_name",
                        hover_data={'avg_rating': ':.2f'},
                        projection="natural earth",
                        labels={'avg_rating': 'Avg Rating'})

    # Update layout settings
    fig.update_layout(
        title_text='Vintage Review per Country',
        geo=dict(
            showland=True,
            showcountries=True,
            showcoastlines=True,
            projection_type='natural earth'
        )
    )

    # Show the figure
    fig.show()


In [26]:
import sqlite3
import pandas as pd
import plotly.express as px

def get_top_wine_per_country(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)

    try:
        # Execute SQL query to retrieve the top wine for each country
        query = """
            SELECT 
                countries.code, 
                countries.name AS country_name,
                MAX(wines.ratings_average) AS max_rating,
                wines.name AS top_wine_name
            FROM 
                wines
            JOIN 
                regions ON wines.region_id = regions.id
            JOIN 
                countries ON regions.country_code = countries.code
            GROUP BY 
                countries.code
        """
        cursor = conn.cursor()
        cursor.execute(query)

        # Fetch all rows
        rows = cursor.fetchall()

        # Convert rows to a list of dictionaries
        data = [{'code': convert_to_alpha3(row[0]), 'country_name': row[1], 'max_rating': row[2], 'top_wine_name': row[3]} for row in rows]

        return data
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

# Function to convert two-letter country code to ISO 3166-1 alpha-3 format
def convert_to_alpha3(code):
    country_codes = {
        'ar': 'ARG', 'au': 'AUS', 'ch': 'CHE', 'cl': 'CHL', 'de': 'DEU', 'es': 'ESP', 'fr': 'FRA', 'gr': 'GRC',
        'hr': 'HRV', 'hu': 'HUN', 'il': 'ISR', 'it': 'ITA', 'md': 'MDA', 'pt': 'PRT', 'ro': 'ROU', 'us': 'USA',
        'za': 'ZAF'
    }
    return country_codes.get(code, code)  # Return the alpha-3 code if available, otherwise return the original code

# Provide the path to your SQLite database file
db_file_path = "../db/vivino.db"

# Call the function to retrieve the data
data = get_top_wine_per_country(db_file_path)

# If data is not None, create a choropleth map using Plotly Express
if data:
    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Create a choropleth map using Plotly Express
    fig = px.choropleth(df, 
                        locations='code', 
                        color='max_rating',
                        color_continuous_scale="RdYlGn",
                        hover_name="country_name",
                        hover_data={'top_wine_name': True, 'max_rating': ':.2f'},
                        projection="natural earth",
                        labels={'max_rating': 'Max Rating'})

    # Update layout settings
    fig.update_layout(
        title_text='Top Wine per Country',
        geo=dict(
            showland=True,
            showcountries=True,
            showcoastlines=True,
            projection_type='natural earth'
        )
    )

    # Show the figure
    fig.show()


In [27]:
import sqlite3
import pandas as pd
import plotly.express as px

def get_top_vintage_rating_per_country(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)

    try:
        # Execute SQL query to calculate the average vintage rating for each country
        query = """
            SELECT 
                countries.code, 
                countries.name AS country_name,
                MAX(vintages.ratings_average) AS max_rating,
                vintages.name AS top_vintage
            FROM 
                vintages
            JOIN 
                wines ON vintages.wine_id = wines.id
            JOIN 
                regions ON wines.region_id = regions.id
            JOIN 
                countries ON regions.country_code = countries.code
            WHERE
                vintages.ratings_average > 0  -- Ignore vintages with zero rating
            GROUP BY 
                countries.code
        """
        cursor = conn.cursor()
        cursor.execute(query)

        # Fetch all rows
        rows = cursor.fetchall()

        # Convert rows to a list of dictionaries
        data = [{'code': convert_to_alpha3(row[0]), 'country_name': row[1], 'max_rating': row[2], 'top_vintage': row[3]} for row in rows]

        return data

    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

# Function to convert two-letter country code to ISO 3166-1 alpha-3 format
def convert_to_alpha3(code):
    country_codes = {
        'ar': 'ARG', 'au': 'AUS', 'ch': 'CHE', 'cl': 'CHL', 'de': 'DEU', 'es': 'ESP', 'fr': 'FRA', 'gr': 'GRC',
        'hr': 'HRV', 'hu': 'HUN', 'il': 'ISR', 'it': 'ITA', 'md': 'MDA', 'pt': 'PRT', 'ro': 'ROU', 'us': 'USA',
        'za': 'ZAF'
    }
    return country_codes.get(code, code)  # Return the alpha-3 code if available, otherwise return the original code

# Provide the path to your SQLite database file
db_file_path = "../db/vivino.db"

# Call the function to retrieve the data
data = get_top_vintage_rating_per_country(db_file_path)

# If data is not None, create a choropleth map using Plotly Express
if data:
    # Convert country codes to ISO 3166-1 alpha-3 format
    df = pd.DataFrame(data)

    # Create a choropleth map using Plotly Express
    fig = px.choropleth(df, 
                        locations='code', 
                        color='max_rating',
                        color_continuous_scale="RdYlGn",
                        hover_name="country_name",
                        hover_data={'top_vintage': True, 'max_rating': ':.2f'},
                        projection="natural earth",
                        labels={'max_rating': 'Max Rating'})

    # Update layout settings
    fig.update_layout(
        title_text='Top Vintage per Country',
        geo=dict(
            showland=True,
            showcountries=True,
            showcoastlines=True,
            projection_type='natural earth'
        )
    )

    # Show the figure
    fig.show()
