In [1]:
import sqlite3

db_file_path = "../db/vivino.db"

In [None]:
def view_db(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        cursor.execute("SELECT * FROM regions")

        columns = [description[0] for description in cursor.description]
        print("Columns:", columns)
        
        # Fetch all rows from the result set
        rows = cursor.fetchall()

        # Display the fetched data
        for row in rows:
            print(row)
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()


In [None]:
def count_names_by_country(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to count names by country code
        cursor.execute("SELECT country_code, COUNT(name) FROM wines GROUP BY country_code")

        # Fetch all rows from the result set
        rows = cursor.fetchall()

        # Display the fetched data
        for row in rows:
            country_code, name_count = row
            print(f"Country Code: {country_code}, Number of Names: {name_count}")
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

In [None]:
def find_unique_region_ids(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Find region IDs in wines table that are not in regions table
        cursor.execute("""
            SELECT DISTINCT wines.region_id
            FROM wines
            LEFT JOIN regions ON wines.region_id = regions.id
            WHERE regions.id IS NULL
        """)
        
        wine_unique_region_ids = cursor.fetchall()

        # Find region IDs in regions table that are not in wines table
        cursor.execute("""
            SELECT DISTINCT regions.id
            FROM regions
            LEFT JOIN wines ON wines.region_id = regions.id
            WHERE wines.region_id IS NULL
        """)
        
        region_unique_region_ids = cursor.fetchall()

        # Display the fetched data
        if wine_unique_region_ids:
            print("Region IDs in wines table not present in regions table:")
            for row in wine_unique_region_ids:
                print(row[0])
        else:
            print("No unique region IDs found in wines table.")

        if region_unique_region_ids:
            print("\nRegion IDs in regions table not present in wines table:")
            for row in region_unique_region_ids:
                print(row[0])
        else:
            print("\nNo unique region IDs found in regions table.")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

In [None]:
def add_wine_regions_count_column(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to add the wine_regions_count column to countries table
        cursor.execute("""
            ALTER TABLE countries
            ADD COLUMN wine_regions_count INTEGER
        """)

        print("Added wine_regions_count column to countries table successfully.")
        
        # Commit the transaction
        conn.commit()
            
    except sqlite3.Error as e:
        conn.rollback()  # Rollback changes if there's an error
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

In [None]:
def update_country_wine_regions_count(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to count the number of regions per country code
        cursor.execute("""
            SELECT regions.country_code, COUNT(regions.id) AS wine_regions_count
            FROM regions
            GROUP BY regions.country_code
        """)

        # Fetch all rows from the result set
        rows = cursor.fetchall()

        # Update the countries table with the wine regions count
        for row in rows:
            country_code, wine_regions_count = row
            cursor.execute("""
                UPDATE countries
                SET wine_regions_count = ?
                WHERE code = ?
            """, (wine_regions_count, country_code))
        
        # Commit the transaction
        conn.commit()
        
        print("Updated countries table with wine regions count successfully.")
            
    except sqlite3.Error as e:
        conn.rollback()  # Rollback changes if there's an error
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

In [37]:
def get_top_wineries(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to get the top 10 wineries with the highest ratings_average
        cursor.execute("""
            SELECT wineries.name, wines.ratings_average
            FROM wines
            JOIN wineries ON wines.winery_id = wineries.id
            ORDER BY wines.ratings_average DESC
            LIMIT 10
        """)

        # Fetch the results
        top_ratings_average = cursor.fetchall()

        # Execute SQL query to get the top 10 wineries with the most ratings_count
        cursor.execute("""
            SELECT wineries.name, wines.ratings_count
            FROM wines
            JOIN wineries ON wines.winery_id = wineries.id
            ORDER BY wines.ratings_count DESC
            LIMIT 10
        """)

        # Fetch the results
        top_ratings_count = cursor.fetchall()

        print("Top 10 wineries with the highest ratings_average:")
        for i, (name, ratings_average) in enumerate(top_ratings_average, start=1):
            print(f"{i}. {name}: {ratings_average}")

        print("\nTop 10 wineries with the most ratings_count:")
        for i, (name, ratings_count) in enumerate(top_ratings_count, start=1):
            print(f"{i}. {name}: {ratings_count}")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

db_file_path = "../db/vivino.db"

# Call the function to get the top 10 wineries
get_top_wineries(db_file_path)


Top 10 wineries with the highest ratings_average:
1. Quintarelli Giuseppe : 4.8
2. Scarecrow : 4.8
3. Sierra Cantabria : 4.8
4. Tenuta Hortense : 4.8
5. Louis Roederer : 4.8
6. Pierre Girardin : 4.8
7. Colgin : 4.7
8. Caymus : 4.7
9. Vega Sicilia : 4.7
10. Vega Sicilia : 4.7

Top 10 wineries with the most ratings_count:
1. Caymus : 157944
2. Dom Pérignon : 146377
3. Antinori : 142513
4. Tenuta San Guido : 107646
5. San Marzano : 94289
6. Masi : 93397
7. Stag's Leap Wine Cellars : 81351
8. Tommasi : 77515
9. Opus One Ove: 77053
10. Château Mouton Rothschild : 73615


In [16]:
def get_top_wineries_rank1_awards(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to count the number of rank 1 awards for each winery
        cursor.execute("""
            SELECT wineries.name, COUNT(*) AS rank1_awards_count
            FROM vintage_toplists_rankings
            JOIN vintages ON vintage_toplists_rankings.vintage_id = vintages.id
            JOIN wines ON vintages.wine_id = wines.id        
            JOIN wineries ON wines.winery_id = wineries.id
            WHERE vintage_toplists_rankings.rank = 1
            GROUP BY wineries.name
            ORDER BY rank1_awards_count DESC
        """)

        # Fetch the results
        top_wineries_rank1_awards = cursor.fetchall()

        # Display the fetched data
        for row in top_wineries_rank1_awards:
            print(f"Winery: {row[0]}, Rank 1 Awards Count: {row[1]}")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()


db_file_path = "../db/vivino.db"

# Call the function to get top wineries with the most rank 1 awards
get_top_wineries_rank1_awards(db_file_path)


Winery: Masseto , Rank 1 Awards Count: 4
Winery: Gaja , Rank 1 Awards Count: 4
Winery: Château Rayas , Rank 1 Awards Count: 4
Winery: La Grange des Pères , Rank 1 Awards Count: 3
Winery: Charles Melton , Rank 1 Awards Count: 2
Winery: Trimbach , Rank 1 Awards Count: 1
Winery: Tommasi , Rank 1 Awards Count: 1
Winery: Scarecrow , Rank 1 Awards Count: 1
Winery: San Marzano , Rank 1 Awards Count: 1
Winery: Salon , Rank 1 Awards Count: 1
Winery: Pago de Carraovejas , Rank 1 Awards Count: 1
Winery: Ornellaia , Rank 1 Awards Count: 1
Winery: Guerrieri , Rank 1 Awards Count: 1
Winery: Dal Forno Romano , Rank 1 Awards Count: 1
Winery: Château des Tours , Rank 1 Awards Count: 1
Winery: Château Margaux , Rank 1 Awards Count: 1
Winery: Château Ducru-Beaucaillou , Rank 1 Awards Count: 1
Winery: Burmester , Rank 1 Awards Count: 1
Winery: Buena Vista , Rank 1 Awards Count: 1
Winery: Bonneau du Martray , Rank 1 Awards Count: 1
Winery: Bodegas Tradición Pedro Ximenez Tradición 20 Years Ol, Rank 1 Award

In [66]:
def get_top_wineries_rank1_awards_with_awards(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to select the top 3 wineries with the most rank 1 awards
        cursor.execute("""
            SELECT wineries.name, COUNT(*) AS rank1_awards_count, GROUP_CONCAT(toplists.name) AS awards_received
            FROM vintage_toplists_rankings
            JOIN vintages ON vintage_toplists_rankings.vintage_id = vintages.id
            JOIN wines ON vintages.wine_id = wines.id        
            JOIN wineries ON wines.winery_id = wineries.id
            JOIN toplists ON vintage_toplists_rankings.top_list_id = toplists.id
            WHERE vintage_toplists_rankings.rank = 1
            GROUP BY wineries.name
            ORDER BY rank1_awards_count DESC
            LIMIT 3
        """)

        # Fetch the results
        top_wineries_rank1_awards_with_awards = cursor.fetchall()

        # Display the fetched data
        for row in top_wineries_rank1_awards_with_awards:
            winery_name, rank1_awards_count, awards_received = row
            awards_list = awards_received.split(',')  # Split the awards into a list
            awards_formatted = "\n".join(awards_list)  # Join the awards with line breaks
            print(f"Winery: {winery_name}, Rank 1 Awards Count: {rank1_awards_count}, Awards Received:\n{awards_formatted}\n")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

db_file_path = "../db/vivino.db"

# Call the function to get top 3 wineries with the most rank 1 awards and their awards
get_top_wineries_rank1_awards_with_awards(db_file_path)


Winery: Masseto , Rank 1 Awards Count: 4, Awards Received:
Vivino's 2020 Wine Style Awards: Tuscan Red
Vivino's 2016 Wine Style Awards: Central Italy Red
Vivino's 2017 Wine Style Awards: Central Italy Red
Vivino's 2018 Wine Style Awards: Tuscan Red

Winery: Gaja , Rank 1 Awards Count: 4, Awards Received:
Vivino's 2016 Wine Style Awards: Italian Barbaresco
Vivino's 2018 Wine Style Awards: Italian Barbaresco
Vivino's 2018 Wine Style Awards: Italian Nebbiolo
Vivino's 2016 Wine Style Awards: Northern Italy White

Winery: Château Rayas , Rank 1 Awards Count: 4, Awards Received:
Vivino's 2020 Wine Style Awards: Southern Rhône Châteauneuf-du-Pape Red
Vivino's 2016 Wine Style Awards: Southern Rhône Red
Vivino's 2017 Wine Style Awards: Southern Rhône Red
Vivino's 2018 Wine Style Awards: Southern Rhône Châteauneuf-du-Pape Red



In [39]:
import sqlite3

def get_top_rated_wine_per_country(db_file):
    # Connect to the database
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    try:
        # Execute SQL query to find the top-rated wine for each country
        cursor.execute("""
            SELECT countries.name, MAX(wines.ratings_average) AS max_rating, wines.name AS top_wine
            FROM wines
            JOIN regions ON wines.region_id = regions.id
            JOIN countries ON regions.country_code = countries.code
            WHERE wines.ratings_average = (
                SELECT MAX(ratings_average) FROM wines AS w 
                WHERE w.region_id = wines.region_id
            )
            GROUP BY countries.name
            ORDER BY max_rating DESC
        """)

        # Fetch the results
        top_rated_wine_per_country = cursor.fetchall()

        # Display the fetched data
        for row in top_rated_wine_per_country:
            country_name, max_rating, top_wine = row
            print(f"Country: {country_name}, Top Rated Wine: {top_wine}, Rating: {max_rating:.2f}")
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

db_file_path = "../db/vivino.db"

# Call the function to get the top-rated wine for each country
get_top_rated_wine_per_country(db_file_path)


Country: États-Unis, Top Rated Wine: Cabernet Sauvignon, Rating: 4.80
Country: Italie, Top Rated Wine: Amarone della Valpolicella Classico Riserva, Rating: 4.80
Country: France, Top Rated Wine: Cristal Rosé Vinothèque, Rating: 4.80
Country: Espagne, Top Rated Wine: Mágico, Rating: 4.80
Country: Hongrie, Top Rated Wine: Eszencia, Rating: 4.70
Country: Australie, Top Rated Wine: Grange, Rating: 4.70
Country: Allemagne, Top Rated Wine: Scharzhofberger Riesling Eiswein, Rating: 4.70
Country: Afrique du Sud, Top Rated Wine: Book 17 XVII, Rating: 4.70
Country: Portugal, Top Rated Wine: Tordiz 40 Year Old Tawny Port, Rating: 4.60
Country: Chili, Top Rated Wine: Almaviva, Rating: 4.60
Country: Argentine, Top Rated Wine: Nosotros Single Vineyard Nómade, Rating: 4.60
Country: Moldavie, Top Rated Wine: Lupi Rezerva, Rating: 4.50
Country: Israël, Top Rated Wine: Yarden Katzrin, Rating: 4.50
Country: Suisse, Top Rated Wine: Sous L'Escalier Petite Arvine du Valais, Rating: 4.40
Country: Roumanie, To

In [65]:
import plotly.express as px

def get_top_wine_per_country(db_file):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_file)

    try:
        # Execute SQL query to retrieve the top wine for each country
        query = """
            SELECT 
                countries.code, 
                countries.name AS country_name,
                MAX(wines.ratings_average) AS max_rating,
                wines.name AS top_wine_name
            FROM 
                wines
            JOIN 
                regions ON wines.region_id = regions.id
            JOIN 
                countries ON regions.country_code = countries.code
            GROUP BY 
                countries.code
        """
        cursor = conn.cursor()
        cursor.execute(query)

        # Fetch all rows
        rows = cursor.fetchall()

        # Convert rows to a list of dictionaries
        data = [{'code': convert_to_alpha3(row[0]), 'country_name': row[1], 'max_rating': row[2], 'top_wine_name': row[3]} for row in rows]

        return data
            
    except sqlite3.Error as e:
        print("SQLite error:", e)
    finally:
        # Close the connection
        conn.close()

# Function to convert two-letter country code to ISO 3166-1 alpha-3 format
def convert_to_alpha3(code):
    country_codes = {
        'ar': 'ARG', 'au': 'AUS', 'ch': 'CHE', 'cl': 'CHL', 'de': 'DEU', 'es': 'ESP', 'fr': 'FRA', 'gr': 'GRC',
        'hr': 'HRV', 'hu': 'HUN', 'il': 'ISR', 'it': 'ITA', 'md': 'MDA', 'pt': 'PRT', 'ro': 'ROU', 'us': 'USA',
        'za': 'ZAF'
    }
    return country_codes.get(code, code)  # Return the alpha-3 code if available, otherwise return the original code

# Provide the path to your SQLite database file
db_file_path = "../db/vivino.db"

# Call the function to retrieve the data
data = get_top_wine_per_country(db_file_path)

# If data is not None, create a choropleth map using Plotly Express
if data:
    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Create a choropleth map using Plotly Express
    fig = px.choropleth(df, 
                        locations='code', 
                        color='max_rating',
                        color_continuous_scale="RdYlGn",
                        hover_name="country_name",
                        hover_data={'top_wine_name': True, 'max_rating': ':.2f'},
                        projection="natural earth",
                        labels={'max_rating': 'Max Rating'})

    # Update layout settings
    fig.update_layout(
        title_text='Top Wine per Country',
        geo=dict(
            showland=True,
            showcountries=True,
            showcoastlines=True,
            projection_type='natural earth'
        )
    )

    # Show the figure
    fig.show()
