In [None]:
#import libraries and dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import chart_studio.plotly as py
import plotly


import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.notebook_repr_html',True)
pd.set_option('display.float_format', lambda x: '%.1f' % x)

In [None]:
py.sign_in('Daina92','Wfamvzko8zxMbVZ1VZ7B')

In [None]:
wine_data = pd.read_csv('Wine_data.csv')
wine_data = wine_data.drop(columns=['Unnamed: 0'],axis=1)
wine_data.head(20)

In [None]:
#Identify the taster of the wine 
wine_data['Taster Name'].value_counts()

In [None]:
# we want to see the cheapeast wine, the average price and the most expensive wine
minimum_price = wine_data['Price'].min()
print(minimum_price)
maximum_price = wine_data['Price'].max()
print(maximum_price)
average_price = round(wine_data['Price'].mean(),2)
print(average_price)

In [None]:
# create a visualization to see the range of wine prices
fig, ax = plt.subplots(figsize=(9, 7))
plt.boxplot(wine_data['Price'], showmeans=True)
plt.title('Distribution of Wine Prices', fontsize=18)
plt.xlabel('Wine')
plt.ylabel('Prices')
plt.show()

In [None]:
# single out the most expensive wine
expensive_wine = wine_data[(wine_data['Price'] >= 3300)]
expensive_wine

In [None]:
# Create a corr matrix to see if there is any correlation between the numeric values in the df
df = wine_data[['Price','Points','Year']]
corrMatrix = df.corr()
sn.heatmap(corrMatrix, annot=True)
plt.show()

# Compare Average Wine Prices By Grade of Wine


In [None]:
#Create a groupby object to aggregate grade of wine vs price of wine
grade_price = wine_data.groupby(['Points']).mean()['Price']
grade_price = grade_price.reset_index()

grade_price = round(grade_price,1)


In [None]:
# plot wine prices vs year produced
import plotly.express as px
data = grade_price
fig = px.bar(data, x='Points', y='Price', hover_data =['Points', 'Price'], color='Price',
             labels ={'Price':'Wine Price($)'},
             title='Average Wine Prices By Grade(World)')


py.iplot(fig, filename='Average Wine Prices By Grade')

# Compare Average Wine Prices by Year 

In [None]:
#Create a groupby object to aggregate year produced vs price of wine
year_price = wine_data.groupby(['Year']).mean()['Price']
year_price = year_price.reset_index()

In [None]:
year_price = round(year_price,1)

# plot wine prices vs year produced
import plotly.express as px
data = year_price
fig = px.bar(data, x='Year', y='Price', hover_data =['Year', 'Price'], color='Price',
             labels ={'Price':'Wine Price($)'},
             title='Average Wine Prices By Year (World)')

py.iplot(fig, filename='Average Wine Prices By Year Produced')



# Compare Average Wine Prices By Country of Origin

In [None]:
#Compare wine prices to the country of origin
country_price = wine_data.groupby(['Country']).mean()['Price']
country_price = country_price.sort_values()
df = country_price.reset_index()


In [None]:
df = round(df,1)

# Plot this wine prices vs country of origin
# we want text label to have the same size, text appear outside of chart
fig = px.bar(df, y='Price', x='Country', text='Price',
             title='Average Wine Prices By Country(World)')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')


py.iplot(fig, filename='Average Wine Prices By Country Of Origin')


In [None]:
# Which wine variety is indentified the most by the wine tasters
wine_data['Variety'].value_counts()

In [None]:
#Focus on the united states and see which states has the most expensive wines on average
usa_only = wine_data.loc[wine_data['Country']== 'US']
usa_only = usa_only.reset_index()

# Drop column called index
usa_only = usa_only.drop('index', axis=1)

In [None]:
# drop strange state column called 'America'
index_names =  usa_only[(usa_only['Province'] == 'America')].index
usa_only.drop(index_names, inplace = True) 

In [None]:
# Compare wine prices by year produced
year_price = usa_only.groupby(['Year']).mean()['Price']
year_price = year_price.reset_index()
year_price = round(year_price,1)

# Plot wine prices vs year produced (USA)
import plotly.express as px
data = year_price
fig = px.bar(data, x='Year', y='Price', hover_data =['Year', 'Price'], color='Price',
             labels ={'Price':'Wine Price($)'},
             title='Wine Prices By Year(USA)')
fig.show()

py.iplot(fig, filename='Average Wine Prices By Year(USA)')

In [None]:
#Plot wine data by price of wines produced in the United States
state_price = usa_only.groupby(['Province']).mean()['Price']
state_price = state_price.reset_index()
state_price = state_price.sort_values('Price')
state_price = round(state_price,1)
state_price.columns= ['State','Wine Prices']

#Group data by State and Price
fig = px.bar(state_price, y='Wine Prices', x ='State', text='Wine Prices', 
             title=' Average Wine Prices By State(USA)')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8 )
fig.show()

py.iplot(fig, filename='Average Wine Prices By State(USA)')

In [None]:
# State and number of wineries
usa_only['Winery'].value_counts()

In [None]:
#drop duplicates values in Winery, we only want the name of a winery to appear once
non_dups = usa_only.drop_duplicates(subset = ['Winery'])

#Group the dataframe by Winery, lat , lng and location
usa = non_dups.groupby(['Latitude','Longitude','Province']).count()['Winery']
usa = usa.reset_index()
usa

In [None]:
# Plot sum of unique wineries by state in The USA
import plotly.graph_objects as go

usa['text'] = usa['Province'] + '<br>' + 'Number of Wineries: ' + usa['Winery'].astype(str)

fig = go.Figure(data=go.Scattergeo(
        locationmode = 'USA-states',
        lon = usa['Longitude'],
        lat = usa['Latitude'],
        text = usa['text'],
        mode = 'markers',
        marker = dict(
            size = 20,
            opacity = 0.9,
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            
            colorscale = 'Purples',
            cmin = 0,
            color = usa['Winery'],
            cmax = usa['Winery'].max(),
            colorbar_title="Number of Wineries")))

fig.update_layout(
        title = 'Number of Wineries in each State <br>(Hover for state and number of wineries)',
        geo_scope='usa',
    )
fig.show()

py.iplot(fig, filename='Total Number of Wineries By State(USA)')



In [None]:
abbrv = ['ARG','AUS','AUT','BUL','CAN','CHL','HRV','GBR','FRA','GEO','DEU','GRC','HUN','ISR','ITA',\
         'MEX','MDA','NZL','PRT','ROU','SVN','ZAF','ESP','TUR','USA','URY']
            
#abbrv

In [None]:
# Make sure wineries are counted only once to avoid duplicate counts
no_dups = wine_data.drop_duplicates(subset = ['Winery'])

#Group the dataframe by Winery, lat , lng and location
world = no_dups.groupby(['Country']).count()['Winery']
world = world.reset_index()
world.columns = ['Country','Number of Wineries']

#Create a list called country code
abbrv = ['ARG','AUS','AUT','BUL','CAN','CHL','HRV','GBR','FRA','GEO','DEU','GRC','HUN','ISR','ITA',\
         'MEX','MDA','NZL','PRT','ROU','SVN','ZAF','ESP','TUR','USA','URY']
            
abbrv
#Add abbrreviation to existing dataframe
world['Country Code'] = abbrv
world

In [1]:
#plot number of wineries by country 
import plotly.express as px


df = world
fig = px.scatter_geo(df, locations='Country Code', color=world['Number of Wineries'],
                     hover_name="Country", size=world['Number of Wineries'],
                    projection='natural earth',
                    )

py.iplot(fig, filename='Number of Wineries By Country')

NameError: name 'world' is not defined

# Wine Finder

In [2]:
import sqlite3

connection = sqlite3.connect('wine_reviews.db')
c = connection.cursor()

In [4]:
def red_wine(connection):
    category ='Red'
    country = input('What country would you like your wine from?Try US for Country: ')
    price = int(input('What is your price cap($)?: '))
    grade = input('What grade would you like? (80 to 100): ')
    print(' ')
    data = c.execute('SELECT DISTINCT Category, Price, Variety, Province, Country, Title FROM wine WHERE Category=? AND Country=? AND Price <=? AND Points=? LIMIT 10;', (category,country,price,grade,)).fetchall()
    
    found = False
    for id, row in enumerate(data):
         if row[0] == category:
            if row[4] == country:
                found = True  # type is Red
                print(row)
    if found == False:  # if id is not found after the iterations of storage, return false
        print('Sorry we do not have this wine at this time')
        return False
    
  
    
    
def white_wine(connection):
    category='White'
    country = input('What country would you like your wine from?Try US for Country: ')
    price = int(input('What is your price cap($)?: '))
    grade = input('What grade would you like? (80 to 100): ')
    print(' ')
    data = c.execute('SELECT DISTINCT Category, Price, Variety,Province,Country, Title FROM wine WHERE Category=? AND Country=? AND Price <=? AND Points=? LIMIT 10;', (category,country,price,grade,)).fetchall()
    
    found = False
    for id, row in enumerate(data):
         if row[0] == category:
            if row[4] == country:
                found = True  # type is White
                print(row)
    if found == False:  # if id is not found after the iterations of storage, return false
        print('Sorry we do not have this wine at this time')
        return False
    
               
        
def main():
    while '1':
        print(' ')
        print('Welcome! I will be your sommelier today')
        print('')
        print('Please choose between the three options: ')
        print('1. Red Wine')
        print('2. White Wine')
        print('3. Quit')
        
        user_option = input(str('Option: '))
        if user_option == '1':
            print('You have selected red wine')
            red_wine(connection)
            
        elif user_option =='2':
            print('You have selected white wine')
            white_wine(connection)
        elif user_option =='3':
            break
        else:
            print('No valid option was chosen')
            
main()
        

    

 
Welcome! I will be your sommelier today

Please choose between the three options: 
1. Red Wine
2. White Wine
3. Quit
Option: 1
You have selected red wine
What country would you like your wine from?Try US for Country: us
What is your price cap($)?: 25
What grade would you like? (80 to 100): 84
 
Sorry we do not have this wine at this time
 
Welcome! I will be your sommelier today

Please choose between the three options: 
1. Red Wine
2. White Wine
3. Quit
Option: 1
You have selected red wine
What country would you like your wine from?Try US for Country: US
What is your price cap($)?: 25
What grade would you like? (80 to 100): 84
 
('Red', 15.0, 'Pinot Noir', 'Oregon', 'US', "The Blanc de Noir name would suggest it's a sparkling wine; it is not. The label also reads Estate White Wine. It is not. Rather, it is a rosé of Pinot Noir, lightly orange in color. Flavors recall a Pop Tart sort of pastry, simple and slightly sweet.")
('Red', 25.0, 'Pinot Noir', 'Oregon', 'US', 'Despite its sing

Option: 3


In [None]:
c.close()
connection.close()