In [27]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB         # Naive Bayes
from sklearn.linear_model import LogisticRegression
from textblob import TextBlob, Word
from nltk.stem.snowball import SnowballStemmer
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
pd.set_option('max_colwidth', 500)


%matplotlib inline

In [28]:
train= pd.read_csv('./winemag-data-130k-v2-copy.csv')

In [29]:
train.isna().sum()

Unnamed: 0                   0
country                     63
description                  0
designation              37465
points                       0
price                     8996
province                    63
region_1                 21247
region_2                 79460
taster_name              26244
taster_twitter_handle    31213
title                        0
variety                      1
winery                       0
dtype: int64

In [30]:
train['log_price'] = np.log(train['price'])

In [31]:
USA_wine=train[(train.country == 'US')]

In [32]:
USA_wine.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,log_price
2,2,US,"Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp acidity underscoring the flavors. The wine was all stainless-steel fermented.",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,2.639057
3,3,US,"Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with notes of honey-drizzled guava and mango giving way to a slightly astringent, semidry finish.",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore),Riesling,St. Julian,2.564949
4,4,US,"Much like the regular bottling from 2012, this comes across as rather rough and tannic, with rustic, earthy, herbal characteristics. Nonetheless, if you think of it as a pleasantly unfussy country wine, it's a good companion to a hearty winter stew.",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child Block Pinot Noir (Willamette Valley),Pinot Noir,Sweet Cheeks,4.174387
10,10,US,"Soft, supple plum envelopes an oaky structure in this Cabernet, supported by 15% Merlot. Coffee and chocolate complete the picture, finishing strong at the end, resulting in a value-priced wine of attractive flavor and immediate accessibility.",Mountain Cuvée,87,19.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Kirkland Signature 2011 Mountain Cuvée Cabernet Sauvignon (Napa Valley),Cabernet Sauvignon,Kirkland Signature,2.944439
12,12,US,"Slightly reduced, this wine offers a chalky, tannic backbone to an otherwise juicy explosion of rich black cherry, the whole accented throughout by firm oak and cigar box.",,87,34.0,California,Alexander Valley,Sonoma,Virginie Boone,@vboone,Louis M. Martini 2012 Cabernet Sauvignon (Alexander Valley),Cabernet Sauvignon,Louis M. Martini,3.526361


# Isolated the dropna for log_price

In [33]:
USA_wine = USA_wine.dropna(subset=['log_price'])

In [34]:
USA_wine.isna().sum()

Unnamed: 0                   0
country                      0
description                  0
designation              17514
points                       0
price                        0
province                     0
region_1                   276
region_2                  3973
taster_name              16754
taster_twitter_handle    19730
title                        0
variety                      0
winery                       0
log_price                    0
dtype: int64

# The top 100 wineries by volume have a mean point value of 85 where as the USA mean is 88

In [35]:
USA_wine.shape

(54265, 15)

In [36]:
#USA_wine.dropna(inplace=True)

In [37]:
winery = list(USA_wine.winery.value_counts()[:1000].index)
def winery_to_simple(x,winery):
    if x in winery:
        return x
    else: 
        return 'Other'
USA_wine['winery_simple'] = USA_wine.winery.apply(lambda x: winery_to_simple(x,winery))
dummy_winery = pd.get_dummies(USA_wine.winery_simple,prefix = 'winery_')

In [38]:
dummy_winery.shape


(54265, 1001)

In [39]:
USA_wine = USA_wine[['log_price','points']]

# Waited till I could isolate log_price with points to eleminate NaN values from only log_price

In [40]:
wine_model=pd.concat([USA_wine,dummy_winery],axis=1)

In [41]:
wine_model.head()

Unnamed: 0,log_price,points,winery__14 Hands,winery__3 Horse Ranch Vineyards,winery__:Nota Bene,winery__A to Z,winery__Abacela,winery__Abeja,winery__Acacia,winery__Acorn,winery__Adelaida,winery__Adelsheim,winery__Agate Ridge,winery__Airfield Estates,winery__Alexander Valley Vineyards,winery__Alexandria Nicole,winery__Alma Rosa,winery__Almquist,winery__Alpha Omega,winery__Alta Colina,winery__Alta Maria,winery__Amalie Robert,winery__Amavi,winery__Ambassador Vineyard,winery__Amici,winery__Amity,winery__Ampelos,winery__Anaba,winery__Anam Cara,winery__Ancient Oak Cellars,winery__Ancient Peaks,winery__Andis,winery__Andretti,winery__Andrew Murray,winery__Andrew Rich,winery__Andrew Will,winery__Angel Vine,winery__Angeline,winery__Anglim,winery__Animale,winery__Anthony Nappa,winery__Anthony Road,winery__Apex,winery__Apolloni,winery__Arbor Brook,winery__Arbor Crest,winery__Archery Summit,winery__Ardor,winery__Argyle,winery__Arista,winery__Armanino Family Cellars,winery__Armida,winery__Arrow Creek,winery__Arrowood,winery__Artesa,winery__Ascension Cellars,winery__Asuncion Ridge,winery__Atwater,winery__Au Bon Climat,winery__Aubichon Cellars,winery__Auclair,winery__Avennia,winery__Ayoub,winery__B Cellars,winery__B.R. Cohn,winery__Babcock,winery__Baer,winery__Baileyana,winery__Bailiwick,winery__Baker & Brain,winery__Ballentine,winery__Balletto,winery__Barboursville Vineyards,winery__Barefoot,winery__Bargetto,winery__Barnard Griffin,winery__Barra of Mendocino,winery__Barrister,winery__Basel Cellars,winery__Baxter,winery__Beaulieu Vineyard,winery__Beauregard,winery__Beckmen,winery__Bedell,winery__Belden Barns,winery__Bell,winery__Bella,winery__Bella Luna,winery__Bellangelo,winery__Belle Ambiance,winery__Benessere,winery__Bennett Lane,winery__Benovia,winery__Benton-Lane,winery__Benziger,winery__Beresan,winery__Bergevin Lane,winery__Bergström,winery__Beringer,winery__Bernardus,winery__Bethel Heights,winery__Betz Family,winery__Bianchi,winery__Bien Nacido,winery__Big Basin,winery__Big Table Farm,winery__Billsboro,winery__Bjornstad,winery__Black Box,winery__Black Kite,winery__Black Ridge,winery__Black Stallion,winery__Blackbird Vineyards,winery__Bloomer Creek,winery__Blue Rock,winery__Bodkin,winery__Boedecker Cellars,winery__Boeger,winery__Boekenoogen,winery__Bogle,winery__Boheme,winery__Bonaccorsi,winery__Bonair,winery__Bonny Doon,winery__Bonterra,winery__Boomtown,winery__Borra,winery__Bota Box,winery__Bouchaine,winery__Boudreaux Cellars,winery__Boundary Breaks,winery__Brandborg,winery__Brander,winery__Brassfield,winery__Brecon Estate,winery__Brian Carter Cellars,winery__Brick & Mortar,winery__Bridlewood,winery__Broken Earth,winery__Brooks,winery__Browne Family Vineyards,winery__Bruliam,winery__Brutocao,winery__Buehler,winery__Buena Vista,winery__Bunnell,winery__Burnt Bridge,winery__Buttonwood,winery__Buttonwood Farm,winery__Buty,winery__Byington,winery__Byron,winery__CK Mondavi,winery__Ca' Momi,winery__Cadaretta,winery__Cadence,winery__Calcareous,winery__Calera,winery__Callaway,winery__Camaraderie,winery__Cambria,winery__Cambridge & Sunset,winery__Cameron Hughes,winery__Canoe Ridge,winery__Carabella,winery__Carlisle,winery__Carmel Road,winery__Carol Shelton,winery__Carr,winery__Casa Larga,winery__Cass,winery__Castello di Amorosa,winery__Castello di Borghese,winery__Castillo De Feliciana,winery__Castle Rock,winery__Castoro Cellars,winery__Cathedral Ridge,winery__Cave B,winery__Cayuse,winery__Cedarville Vineyard,winery__Center of Effort,winery__Chacewater,winery__Chaddsford,winery__Chalk Hill,winery__Chamisal Vineyards,winery__Chandon,winery__Chanin,winery__Channing Daughters,winery__Charles & Charles,winery__Charles Krug,winery__Charles Smith,winery__Chateau Lafayette Reneau,winery__Chateau Morrisette,winery__Chateau St. Jean,winery__Chateau Ste. Michelle,winery__Chehalem,winery__Chime,winery__Chimney Rock,winery__Chronic Cellars,winery__Château Bianca,winery__Cinder,winery__Cinnabar,winery__City Winery Chicago,winery__Claiborne & Churchill,winery__Clayhouse,winery__Clif Family,winery__Cliff Creek,winery__Cline,winery__Clos La Chance,winery__Clos Pegase,winery__Clos du Bois,winery__Clos du Val,winery__Cloudlift Cellars,winery__Coelho,winery__Coeur d'Alene,winery__Coeur de Terre,winery__College Cellars,winery__Collier Falls,winery__Colter's Creek,winery__Columbia Crest,winery__Columbia Winery,winery__Comartin,winery__Concannon,winery__Conn Creek,winery__Convergence Zone,winery__Cooper-Garrod,winery__Coquelicot,winery__Corley,winery__Corvus,winery__Cosentino,winery__Courtney Benham,winery__Covey Run,winery__Covington,winery__Cowhorn,winery__Cristom,winery__Cru,winery__Cuda Ridge Wines,winery__Cupcake,winery__Curtis,winery__Cuvaison,winery__Côte Bonneville,winery__D.R. Stephens,winery__DaMa,winery__Damiani,winery__DanCin,winery__Dante Robere,winery__Daou,winery__Darcie Kent Vineyards,winery__Darioush,winery__David Hill,...,winery__Rosenblum,winery__Ross Andrew,winery__Roth,winery__Round Pond,winery__RoxyAnn,winery__Roza Ridge,winery__Ruby Hill Winery,winery__Rusack,winery__Rutherford Hill,winery__Rutherford Ranch,winery__Ryan Patrick,winery__Rôtie Cellars,winery__Sagelands,winery__Saintsbury,winery__San Juan Vineyards,winery__San Simeon,winery__Sanford,winery__Sanglier Cellars,winery__Sanguis,winery__Sannino,winery__Santa Barbara Winery,winery__Sante Arcangeli,winery__Saracina,winery__Sarah's Vineyard,winery__Sausal,winery__Savage Grace,winery__Saviah,winery__Sawtooth,winery__Sbragia,winery__Schramsberg,winery__Schug,winery__Scott Harvey,winery__Scott Paul,winery__Sculpterra,winery__Sea Smoke,winery__Sean Minor,winery__Sean Thackrey,winery__Season,winery__Sebastiani,winery__Seghesio,winery__Sequoia Grove,winery__Sequum,winery__Seven Hills,winery__Sextant,winery__Shafer,winery__Shannon Ridge,winery__Shaw,winery__Shea,winery__Sheldrake Point,winery__Shenandoah Vineyards,winery__Shooting Star,winery__Siduri,winery__Signorello,winery__Silvan Ridge,winery__Silver Thread,winery__Silverado,winery__Simi,winery__Sineann,winery__Sinor-LaVallee,winery__Six Sigma Ranch,winery__Skinner,winery__Sleight of Hand,winery__Small Vines,winery__Snoqualmie,winery__Sobon Estate,winery__Sojourn,winery__Sokol Blosser,winery__Sol Rouge,winery__Soléna,winery__Sonoma-Cutrer,winery__Soos Creek,winery__South Coast,winery__South Stage,winery__Souverain,winery__Spann Vineyards,winery__Sparkling Pointe,winery__Sparkman,winery__Spell,winery__Spicy Vines,winery__Spindrift Cellars,winery__Spring Mountain Vineyard,winery__Spring Valley Vineyard,winery__St. Francis,winery__St. Innocent,winery__St. Supéry,winery__Stag's Leap Wine Cellars,winery__Stags' Leap Winery,winery__Standing Stone,winery__Star Lane Vineyard,winery__Starmont,winery__Ste. Chapelle,winery__Stemmler,winery__Stephen & Walker,winery__Stephen Ross,winery__Sterling,winery__Steven Kent,winery__Stevens,winery__Stickybeak,winery__Still Waters,winery__Stinson,winery__Stoller,winery__Stolpman,winery__Stonestreet,winery__Stottle,winery__Stuhlmuller Vineyards,winery__SuLei,winery__Summerland,winery__Summers,winery__Summerwood,winery__Sunstone,winery__Sutcliffe,winery__Swanson,winery__Swedish Hill,winery__Sweet Cheeks,winery__Swiftwater Cellars,winery__Syncline,winery__Tablas Creek,winery__Talbott,winery__Talisman,winery__Talley,winery__Tamarack Cellars,winery__Tamber Bey,winery__Tangent,winery__Tantara,winery__Tarara,winery__Telaya,winery__Tempus Cellars,winery__Tercero,winery__Terlato,winery__Terra Valentine,winery__Terra d'Oro,winery__Terre Rouge,winery__Tertulia,winery__Testarossa,winery__The Calling,winery__The Eyrie Vineyards,winery__The Farm Winery,winery__The Federalist,winery__The Four Graces,winery__The Williamsburg Winery,winery__Thirsty Owl Wine Company,winery__Thomas Fogarty,winery__Thomas George,winery__Three Fox,winery__Three Rivers,winery__Three Sticks,winery__Thurston Wolfe,winery__Tildio,winery__Tin Barn,winery__Toad Hollow,winery__Tolosa,winery__Torii Mor,winery__Tortoise Creek,winery__Tranche,winery__Treana,winery__Trefethen,winery__Treleaven,winery__Trentadue,winery__Trinchero,winery__Trinitas,winery__Trione,winery__Trisaetum,winery__Troon,winery__Truchard,winery__Truett Hurst,winery__Trump,winery__Trust,winery__Tulpen,winery__Turnbull,winery__Two Mountain,winery__Two Vintners,winery__Twomey,winery__Ty Caton,winery__Uvaggio,winery__V. Sattui,winery__VML,winery__Va Piano,winery__Valley View,winery__Valley of the Moon,winery__Vellum,winery__Venge,winery__Ventana,winery__Ventosa,winery__Veramar,winery__Vermeil,winery__Vidon Vineyard,winery__Vie Winery,winery__Viento,winery__Vigilance,winery__Villa San Juliette,winery__Vin du Lac,winery__Vina Robles,winery__Vine Cliff,winery__Vinum,winery__Vista Hills,winery__Vivác Winery,winery__Volker Eisele Family Estate,winery__Von Strasser,winery__W.H. Smith,winery__W.T. Vintners,winery__Wagner,winery__Waits-Mast,winery__Walla Walla Vintners,winery__Walnut City WineWorks,winery__Walt,winery__Washington Hills,winery__Waterbrook,winery__Watermill,winery__Waters,winery__Waters Crest,winery__Waterstone,winery__Wattle Creek,winery__Waxwing,winery__Wayfarer,winery__Wedell Cellars,winery__Wellington,winery__Wente,winery__WesMar,winery__Westerly,winery__White Oak,winery__Whitecliff Vineyard,winery__Whitehall Lane,winery__Wild Horse,winery__WillaKenzie Estate,winery__Willamette Valley Vineyards,winery__William Church,winery__William Hill Estate,winery__Williams Selyem,winery__Wilridge,winery__Wilson,winery__Wind Rose,winery__Winderlea,winery__Winter's Hill,winery__Wise Villa,winery__Wood Family Vineyards,winery__Woodbridge by Robert Mondavi,winery__Woodenhead,winery__Woodinville Wine Cellars,winery__Woodward Canyon,winery__World's End,winery__Wrath,winery__Wölffer,winery__X,winery__Y Rousseau,winery__Yamhill Valley,winery__Yorkville Cellars,winery__ZD,winery__Zaca Mesa,winery__Écluse,winery__àMaurice
2,2.639057,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2.564949,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4.174387,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10,2.944439,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12,3.526361,87,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
wine_model.shape

(54265, 1003)

In [43]:
#wine_model.corr()['log_price'].sort_values(ascending=False)[:500]

In [44]:
features = [x for x in wine_model.columns if x != ('points')]

X=wine_model[features]
y=wine_model['points']

X_train,X_test,y_train,y_test = train_test_split(X,y, random_state = 42)

In [45]:
from sklearn.linear_model import Ridge

for alpha in range(10):
    alpha=alpha/10
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Instantiate the model.
    #(Alpha of zero has no regularization strength, essentially a basic linear regression.)
    ridgereg = Ridge(alpha=alpha, normalize=True)

    # Fit the model.
    ridgereg.fit(X_train, y_train)

    # Predict with fitted model.
    predictions = ridgereg.predict(X_test)
    #print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    score = ridgereg.score(X_train,y_train)
    print(alpha,score)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  import sys


0.0 0.4295443939886455
0.1 0.42636193276544376
0.2 0.41941352201989923
0.3 0.4107242087509918
0.4 0.4012151844744993
0.5 0.3913708210160819
0.6 0.381467854845917
0.7 0.37167138600544924
0.8 0.3620813764088111
0.9 0.3527576736449339


# This proves the inherent bias of wine scores.
# If your are a wellknown winery and you consistently make great wine you are going to score great wine

# Price by winery score is .62 the best scoring model by far