In [41]:
from __future__ import print_function
import pandas as pd
# for plotting
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import numpy as np
pd.pandas.set_option('display.max_columns', None)
#pip install --upgrade gensim
import json
import operator
import collections
import re
import numpy as np
import pandas as pd
import nltk
from gensim.models import word2vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
%matplotlib inline

In [42]:
train_dataset = json.load(open('train.json','r'))
test_dataset = json.load(open('test.json','r'))

In [43]:
ingredients = list()

for recipe in train_dataset:
    for ingredient in recipe[u'ingredients']:
        ingredients.append(ingredient.strip())

cuisines = list()
for recipe in train_dataset:
    cuisines.append(recipe[u'cuisine'].strip())
print("Total recipes in the train corpus",len(cuisines))

Total recipes in the train corpus 39774


# Dictionary of output labels

In [44]:
labels = set(cuisines)
labels = list(labels)
labels = { labels[i] : i for i in range(0, len(labels) ) }
print(labels)

{'british': 0, 'cajun_creole': 1, 'mexican': 2, 'korean': 3, 'thai': 4, 'southern_us': 5, 'indian': 6, 'italian': 7, 'brazilian': 8, 'french': 9, 'japanese': 10, 'moroccan': 11, 'russian': 12, 'irish': 13, 'vietnamese': 14, 'spanish': 15, 'greek': 16, 'chinese': 17, 'jamaican': 18, 'filipino': 19}


In [45]:
print('ingredients in the dataset: ',len((ingredients)))
print('unique ingredients in the dataset: ',len(set(ingredients)))

ingredients in the dataset:  428275
unique ingredients in the dataset:  6714


# Cleaning

In [46]:
import inflect
stemmer = inflect.engine()

recipes_as_sentences_list = list()

for recipe in train_dataset:
    clean_recipe = list()
    for ingredient in recipe['ingredients']:
        ingredient = re.sub(r'[^a-zA-Z\s]+', '', ingredient)
        ingredient =  re.sub(r'\(.*oz.\)|large|medium|small|extra|low-fat|warm|plain|hot|crushed|crumbles|ground|minced|chopped|sliced|heavy|grated|all-purpose|shredded|breakfast|whole|crumbled|self rising|low sodium|flat|diced|unsweetened','', ingredient)
        if stemmer.singular_noun(ingredient) is not False:
            ingredient = stemmer.singular_noun(ingredient)
        words_in_ingredients = nltk.tag.pos_tag(ingredient.split())
        words_in_ingredients=[word[0] for word in words_in_ingredients if word[1] in ('NN')]
        ingredient = " ".join(words_in_ingredients)
        ingredient = ingredient.strip().lower()
        clean_recipe.append(ingredient)
    while("" in clean_recipe) :
        clean_recipe.remove("")
    recipes_as_sentences_list.append(clean_recipe)

In [47]:
u_ingredients = set()
for recipe in recipes_as_sentences_list:
    for ingredient in recipe:
        u_ingredients.add(ingredient)
print('unique ingredients after cleaning the dataset: ',len(u_ingredients))

unique ingredients after cleaning the dataset:  3903


In [48]:
ingredients_removed_in_cleaning = u_ingredients.difference(set(ingredients))
print(len(ingredients_removed_in_cleaning))

1719


In [49]:
num_features = 300              
min_word_count = 1                   
num_workers = 4
context = 10                                                                                

model = word2vec.Word2Vec(recipes_as_sentences_list, workers=num_workers, \
            size=num_features, min_count = min_word_count, \
            window = context)

In [50]:
print("Before cleaning: ")
print(train_dataset[12342]["ingredients"])
print("After cleaning: ")
print(recipes_as_sentences_list[12342])
print("=======================================================")
print("Before cleaning: ")
print(train_dataset[122]["ingredients"])
print("After cleaning: ")
print(recipes_as_sentences_list[122])

Before cleaning: 
['light soy sauce', 'peeled shrimp', 'brown sugar', 'green onions', 'chicken stock', 'fresh ginger root', 'boneless pork loin', 'chinese rice wine', 'wonton wrappers']
After cleaning: 
['soy sauce', 'shrimp', 'brown sugar', 'onion', 'chicken stock', 'ginger root', 'boneless pork loin', 'rice wine', 'wonton wrapper']
Before cleaning: 
['tomato sauce', 'ground black pepper', 'garlic', 'scallions', 'chipotles in adobo', 'avocado', 'dried thyme', 'instant white rice', 'cilantro leaves', 'coconut milk', 'water', 'red beans', 'chopped celery', 'skinless chicken thighs', 'onions', 'lime zest', 'lime juice', 'lime wedges', 'salt', 'carrots']
After cleaning: 
['tomato sauce', 'pepper', 'garlic', 'scallion', 'chipotle adobo', 'avocado', 'thyme', 'rice', 'cilantro leaf', 'coconut milk', 'water', 'bean', 'celery', 'skinless chicken thigh', 'onion', 'lime zest', 'lime juice', 'lime wedge', 'salt', 'carrot']


In [51]:
model.similarity('ginger root', 'ginger')

0.9395316

In [52]:
print(model.wv.vectors.shape)
vocab = model.wv.vocab
#print(model['olive'])

(3903, 300)


In [53]:
ingredients_new = list()
for recipe in recipes_as_sentences_list:
    for ingredient in recipe:
        ingredients_new.append(ingredient.strip())
print(len((ingredients_new)))
ingredients_new_set = set(ingredients_new)
print(len((ingredients_new_set)))

#so there are no unknown 

416965
3903


In [54]:
unknown_ingredients = []
for ingredient in ingredients_new_set:
    if ingredient not in vocab.keys():
        unknown_ingredients.append(ingredient)

In [55]:
print(len(unknown_ingredients))

0


In [56]:
def sent_vectorizer(sent, model):
    sent_vec =[]
    numw = 0
    sent = [x for x in sent if x not in unknown_ingredients]
    for w in sent:
        try:
            if numw == 0:
                sent_vec = model[w]
            else:
                sent_vec = np.add(sent_vec, model[w])
            numw+=1
        except:
            pass
    return np.asarray(sent_vec) / numw

In [57]:
recipes_as_context_vectors_of_ingredients=[]
for sentence in recipes_as_sentences_list:
    recipes_as_context_vectors_of_ingredients.append(sent_vectorizer(sentence, model))

In [58]:
print("Length of vector", len(recipes_as_context_vectors_of_ingredients[0]))
print(recipes_as_sentences_list[0])
print("Context Vectors taindataset count records: ", len(recipes_as_context_vectors_of_ingredients))
print("Original taindataset count records: ",len(recipes_as_sentences_list))

Length of vector 300
['romaine lettuce', 'olive', 'grape tomato', 'garlic', 'pepper', 'onion', 'feta cheese']
Context Vectors taindataset count records:  39774
Original taindataset count records:  39774


# Update json tarindataset

In [59]:
for i,recipe in enumerate(train_dataset):
    recipe[u'ingredients'] = recipes_as_context_vectors_of_ingredients[i]
    recipe[u'cuisine'] = labels[recipe[u'cuisine']]

# Convert Json to Pandas dataframe: Only for understanding and taken output variable from this dataframe

In [60]:
from pandas.io.json import json_normalize
train_dataset_new = json_normalize(train_dataset)
train_dataset_new.drop(['id'],axis=1,inplace=True)
train_dataset_new.head()

Unnamed: 0,cuisine,ingredients
0,16,"[0.28586197, -0.09290139, -0.10495607, -0.1410..."
1,5,"[0.20361829, 0.092594296, -0.11171341, -0.1515..."
2,19,"[0.08699065, 0.051300272, 0.01406965, -0.26668..."
3,6,"[0.56616384, 0.20685188, -0.044593178, -0.2793..."
4,6,"[0.10824637, 0.15037799, 0.0628284, -0.1791812..."


# Machine Learning

In [61]:
data = pd.DataFrame(recipes_as_context_vectors_of_ingredients)
print(data.shape)
data.head()

(39774, 300)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299
0,0.285862,-0.092901,-0.104956,-0.141079,-0.343134,0.018658,-0.329705,-0.253953,-0.107836,0.27517,0.12664,-0.164504,0.242465,-0.156059,0.069863,-0.37154,-0.186931,-0.457091,-0.283191,0.018625,0.216196,-0.047563,0.054361,0.331155,-0.221449,0.118537,0.309211,0.099856,-0.195914,-0.216138,0.148422,0.111567,-0.243724,-0.30154,0.153315,0.226266,-0.168114,0.038124,-0.377165,-0.104259,-0.245899,-0.034031,0.186209,-0.246264,-0.355434,-0.02147,-0.128582,0.245688,0.456542,0.075975,-0.102264,0.066549,-0.158218,-0.314325,0.225521,-0.224753,-0.022723,-0.083077,0.166481,0.009899,-0.473658,-0.497112,0.032715,-0.091476,-0.164462,0.171175,0.410027,0.197244,0.495246,-0.095109,0.53143,0.021766,-0.332679,-0.048326,0.279605,-0.022727,0.165361,-0.234565,0.276868,0.110088,0.091306,-0.506916,0.450488,-0.025608,-0.195939,0.263726,0.198808,0.190501,0.238037,0.161614,-0.335157,0.26125,0.366735,-0.320798,0.100499,0.323603,0.206073,0.142174,-0.167733,0.040372,-0.107068,-0.101688,-0.283618,-0.49018,-0.082247,-0.248786,0.073676,-0.392439,0.318291,0.00956,0.045009,0.077192,0.315131,0.101433,0.033035,-0.214375,-0.020706,0.170736,0.102457,-0.132621,0.03606,-0.334531,0.381246,-0.254584,0.082054,-0.383621,-0.089251,0.037412,-0.096276,0.061637,0.219466,-0.288311,0.308858,-0.382105,-0.525282,0.100674,0.017538,0.204352,0.42922,0.464965,0.47738,0.217622,0.14784,0.488478,0.073497,-0.128959,0.139289,0.106418,0.288603,-0.511372,0.066647,-0.127446,-0.046222,0.210256,-0.704312,0.012463,0.436102,0.041509,-0.137014,-0.28883,-0.290843,0.069458,-0.056271,0.170431,0.200217,-0.207172,-0.235331,0.080113,-0.128064,-0.309254,0.199523,0.306442,0.345948,-0.325753,-0.160482,0.48086,-0.206121,-0.029315,-0.06502,0.21912,0.541726,-0.149281,-0.086914,-0.045517,0.417162,0.194302,0.41609,-0.087867,-0.105092,-0.209816,0.194123,0.078157,-0.419996,-0.064581,0.298767,-0.012142,-0.230738,0.116464,0.548734,0.03993,-0.050884,-0.299697,0.354323,0.053719,-0.248611,-0.156978,0.144317,-0.033441,-0.133375,0.318379,0.103274,-0.032953,-0.013313,0.06105,-0.053579,0.615861,-0.205658,0.09013,0.21069,-0.151209,0.239364,-0.31542,-0.266888,0.114705,-0.001046,-0.127042,-0.326912,-0.656359,0.327963,0.084491,-0.351521,0.397267,0.222389,0.096462,0.065139,-0.065133,-0.180603,0.362301,0.122646,-0.202959,0.139354,-0.070015,-0.46328,-0.041579,-0.261727,-0.469859,-0.256715,0.307068,-0.200408,-0.152571,0.095478,-0.561029,-0.068957,0.167482,-0.13248,-0.199525,-0.009716,-0.063114,-0.444418,0.087899,-0.248784,-0.401089,-0.162596,-0.252509,0.20756,-0.360343,0.078712,-0.434182,-0.072858,0.292766,0.145144,0.791459,-0.378336,-0.102891,0.398605,-0.039855,-0.157138,-0.376547,0.213217,0.23456,0.068015,-0.245633,-0.344618,-0.066779,0.12725,0.090009,0.080086,-0.280653,0.233545,0.371713,0.194261,-0.013065,0.269459,-0.091065,-0.206738,-0.20661,-0.142062,-0.137695,0.060239,-0.257914
1,0.203618,0.092594,-0.111713,-0.151551,-0.092854,0.162009,-0.126274,-0.005025,0.032185,0.140755,-0.146589,0.071932,0.17138,0.096066,-0.077652,0.197759,-0.004425,-0.133261,-0.146471,-0.288323,0.219627,0.07213,-0.081254,-0.006711,-0.133664,0.074143,-0.20611,-0.066605,0.213317,-0.153791,0.048308,0.021753,0.080857,-0.244494,0.051355,0.053983,0.07639,-0.040799,-0.330558,-0.043871,-0.203573,0.195085,-0.085623,-0.410295,-0.295467,0.140634,0.087754,-0.022836,0.256366,-0.12496,-0.162298,-0.085922,-0.142451,-0.080477,0.171513,0.178582,0.137657,-0.206184,-0.135562,-0.209845,-0.16369,-0.238586,-0.219156,-0.024028,-0.087232,-0.190154,0.141471,-0.184449,0.14053,-0.104247,0.240641,0.332978,-0.31071,-0.203325,0.203066,0.163328,0.227577,0.072039,0.086306,0.090072,-0.064411,-0.138526,-0.231596,0.182317,-0.254161,-0.104525,0.194926,0.300882,0.017396,0.191678,-0.110847,0.05,0.087566,-0.011957,-0.006275,-0.021744,-0.073451,0.133804,-0.122516,0.156166,0.1373,-0.113651,-0.098912,-0.131161,0.10296,-0.43954,-0.133641,-0.109921,0.323531,0.133956,-0.081387,0.165185,-0.239237,0.272974,0.044976,0.103606,-0.18749,0.031573,0.223252,-0.377205,0.406329,-0.161134,0.247495,-0.127054,-0.164372,-0.448515,-0.317078,0.011822,-0.04248,-0.163086,0.085382,-0.066055,-0.021366,-0.195205,-0.31238,0.139328,0.477968,0.048894,-0.032284,0.1976,0.199606,0.010469,-0.053367,0.156863,-0.209604,-0.332467,0.037627,0.297964,0.193407,-0.186004,-0.124629,0.047236,0.241472,0.236084,-0.263842,-0.236942,0.18445,-0.001174,-0.43116,-0.107417,-0.125284,-0.136666,-0.017703,0.236069,0.168242,-0.218667,-0.434346,0.027347,-0.105657,-0.187233,0.116247,0.086795,0.099084,-0.474921,-0.086564,0.269139,-0.217726,-0.133996,-0.012456,0.076382,0.335158,-0.11942,-0.062883,-0.460466,0.246018,-0.118886,0.046795,0.085551,-0.026737,-0.250894,0.175409,0.124365,-0.301016,-0.187728,-0.041285,0.183113,0.026255,-0.161252,0.375214,-0.115217,0.22103,-0.111942,0.025597,-0.07635,0.120117,-0.00232,0.191324,-0.014079,-0.291458,0.300771,0.254621,0.229613,-0.031228,-0.11219,0.147898,0.36352,-0.284971,0.322276,0.114691,-0.03784,0.547195,-0.102793,-0.224127,0.13409,0.015073,0.180249,-0.428845,-0.171952,0.099711,-0.193889,-0.168466,0.176378,0.330643,0.183587,-0.229001,0.057875,0.001483,0.390407,-0.069313,-0.232497,0.20075,0.092258,-0.365005,-0.082577,-0.237838,-0.245629,0.035658,0.356923,-0.034283,-0.065675,-0.156936,-0.356344,0.036653,0.00522,0.089092,-0.044336,-0.085061,-0.009935,0.000492,0.240796,-0.063105,-0.149534,-0.222335,-0.155561,-0.176209,-0.136387,-0.063831,-0.191043,-0.086739,0.118542,0.207997,0.492016,-0.1612,-0.183312,0.352156,-0.132277,-0.12476,-0.033638,-0.143204,0.055683,-0.319014,0.238021,-0.25036,0.122496,0.055985,0.399573,-0.094738,-0.049095,-0.008034,0.376308,-0.232284,0.065084,0.200479,0.061887,0.155471,-0.056409,0.117316,-0.268429,-0.089767,-0.154529
2,0.086991,0.0513,0.01407,-0.266688,0.01802,0.076325,-0.072756,0.096848,0.015955,0.083976,0.030003,0.016415,0.055144,-0.042187,-0.079055,0.046444,0.072042,-0.133561,-0.229814,-0.261906,0.058335,0.147804,0.029174,-0.071264,0.093863,-0.002036,-0.190942,-0.163506,0.165119,-0.145068,-0.069798,-0.097826,0.133282,-0.298999,0.230804,0.057313,0.013218,0.090972,-0.111292,0.132975,-0.065516,0.196657,-0.017602,-0.30427,-0.114749,0.01082,0.034512,-0.007646,0.137759,-0.082141,0.076491,-0.03163,-0.070558,-0.090141,-0.16216,-0.007637,0.146679,-0.168815,-0.138505,-0.055556,-0.064851,-0.112405,-0.183395,-0.053764,-0.022,-0.007013,0.127383,-0.027618,0.155692,-0.111284,0.207491,-0.053392,-0.35386,-0.142852,0.05848,0.056025,0.201573,0.061702,0.190515,0.149295,-0.291498,-0.082518,-0.063591,0.19157,-0.098225,-0.07192,0.152201,0.383195,0.218988,-0.029292,-0.078638,-0.224258,0.081143,-0.069394,-0.084662,-0.027341,-0.110882,0.144524,0.012243,0.118314,0.131954,-0.313989,-0.094877,-0.158208,0.024832,-0.252567,-0.010676,-0.162332,0.254706,0.263758,0.05532,-0.019738,0.058388,0.141759,0.113176,0.168175,-0.30858,0.05834,0.03248,-0.074283,0.209353,8e-06,-0.057205,-0.231915,-0.040958,-0.29934,-0.351615,0.001594,0.027232,-0.10826,-0.085282,-0.041014,-0.325083,-0.201304,-0.138187,0.075713,0.506145,0.05243,-0.145433,0.313508,0.068945,-0.085245,-0.119818,-0.040852,-0.373275,-0.106733,0.2706,0.304363,0.060787,-0.396129,-0.148025,-0.059601,0.377488,0.251941,-0.217052,-0.349136,0.091294,-0.006683,-0.145933,-0.270569,0.036486,-0.121327,-0.063068,0.242767,0.145712,-0.154268,-0.298508,-0.011772,-0.077295,0.032714,0.029509,0.363572,0.117202,-0.430462,-0.057564,0.207152,-0.050183,-0.101038,0.204491,-0.074599,0.34362,-0.003285,-0.09137,-0.284214,0.179654,-0.124838,0.082732,0.042469,0.003499,-0.191368,-0.01926,0.054916,-0.243503,-0.03415,0.050453,0.238026,0.034949,-0.105499,0.141685,-0.158613,0.153561,0.044685,0.172649,-0.243594,0.069262,-0.058605,0.210959,-0.051869,-0.011404,0.157893,0.15409,-0.019697,0.133004,-0.087116,0.026827,0.317569,-0.144775,0.212823,0.148963,0.156032,0.41334,-0.050362,-0.055896,0.009347,-0.10885,0.112609,-0.257373,-0.14128,-0.019169,-0.123292,-0.22554,0.187797,0.30691,0.07453,-0.11931,-0.011511,-0.012722,0.346503,0.068656,0.015146,0.156293,0.17561,-0.180532,-0.123592,-0.103184,-0.216427,-0.00312,0.348377,-0.025528,-0.086383,-0.016797,-0.324482,-0.008742,0.043135,-0.094435,-0.116056,-0.070978,0.037155,-0.057271,0.084891,-0.027711,-0.31938,-0.131621,-0.021173,-0.103985,0.126059,-0.131131,-0.325603,-0.179388,0.072691,0.129529,0.519602,-0.16376,-0.084352,0.333454,-0.179883,-0.164571,-0.172519,-0.285628,-0.107953,-0.296714,0.039392,-0.183681,0.272956,0.009284,0.389824,-0.238702,-0.032408,-0.084026,0.157282,-0.006993,-0.125822,-0.024705,0.154116,-0.002896,-0.341837,0.098061,-0.143044,0.110434,0.07376
3,0.566164,0.206852,-0.044593,-0.279375,-0.009993,0.677138,-0.397538,-0.0308,0.025899,0.212931,-0.188754,-0.024006,0.301748,-0.081315,-0.09174,-0.045275,-0.104239,-0.192319,-0.220626,-0.115645,0.06668,0.098953,0.054537,-0.028961,-0.137575,0.097442,-0.222266,-0.130018,0.084331,0.145453,0.006144,0.167078,0.074369,-0.259616,0.066693,-0.211595,0.05754,-0.038198,-0.290052,-0.212264,-0.148861,0.148719,-0.116703,-0.099126,-0.096133,0.03178,-0.036027,-0.226385,-0.08908,-0.224905,-0.072101,-0.047113,0.232748,0.059157,0.000774,0.257302,0.048151,-0.0581,-0.294304,0.015604,-0.023176,-0.06183,0.017336,-0.116317,0.035836,-0.042511,0.218118,-0.095266,-0.050814,0.008569,0.042483,-0.009109,-0.245612,0.158653,0.011455,-0.109488,0.032467,0.016147,-0.139279,0.035068,0.144876,0.02479,-0.129388,0.028952,-0.07409,-0.208479,0.059405,0.044225,0.151583,0.1752,0.17515,-0.062551,0.17655,-0.188157,-0.230918,-0.15314,-0.076526,-0.023603,-0.088532,-0.016176,0.095291,-0.045558,-0.006392,0.017826,0.008417,-0.132197,-0.083278,0.003563,0.323005,0.067964,-0.146875,0.042503,-0.121067,0.225914,-0.024081,0.033169,-0.088488,0.137004,-0.002882,-0.159305,0.070007,0.125328,0.061074,-0.142534,-0.132201,-0.212362,-0.187717,0.145115,0.013065,-0.175926,0.083367,-0.280243,-0.102782,0.03686,-0.054244,0.075556,0.167962,-0.126006,-0.016544,0.101905,0.213466,-0.108549,-0.083249,0.072884,-0.415505,-0.154999,-0.00492,0.210361,0.093828,-0.183419,0.033974,0.037036,0.252661,0.249881,-0.014717,-0.128992,0.273096,-0.151579,-0.228815,-0.054454,-0.025364,-0.204476,-0.107827,0.118701,0.131734,-0.070491,-0.333688,-0.011806,-0.14486,-0.07087,0.194579,0.270571,0.136618,-0.367978,-0.042273,0.143486,-0.219075,-0.05774,0.123852,-0.003964,0.163066,-0.078155,0.123481,-0.119476,0.191134,-0.124601,-0.068136,0.053538,-0.059735,-0.34829,0.018004,0.123338,-0.369224,-0.033356,-0.009858,0.129576,-0.041675,-0.165888,0.162072,0.102491,0.152568,-0.012879,0.054794,-0.113366,-0.002827,-0.134159,0.154497,-0.01414,-0.134878,0.210294,0.23135,0.184908,-0.0851,-0.062744,0.105669,0.150353,-0.278646,0.076226,0.049133,0.031725,0.443011,-0.084754,-0.228655,0.04053,-0.04781,-0.056666,-0.395298,-0.11526,0.057823,-0.019467,-0.170847,0.255849,0.292331,0.270784,-0.151394,0.087736,-0.119623,0.309142,-0.080884,-0.130957,0.154624,0.180055,-0.340308,-0.058732,-0.172239,-0.3577,-0.104967,0.109876,-0.069765,0.146617,0.017982,-0.306961,-0.012606,0.192311,0.04651,0.017833,-0.073205,0.107341,-0.086663,0.197276,-0.0756,-0.217188,-0.031998,-0.13491,0.035158,0.037328,0.000264,-0.416864,-0.25126,-0.014332,0.322554,0.56196,-0.188096,0.053132,0.475206,-0.102501,-0.364149,-0.196387,-0.300677,-0.025753,-0.153061,0.023657,-0.125456,0.40506,0.043828,0.242333,-0.108979,0.045264,-0.100062,0.305589,-0.08487,0.068211,0.055009,0.204943,0.145478,-0.226321,0.10335,-0.060725,0.169908,0.003326
4,0.108246,0.150378,0.062828,-0.179181,0.032288,0.376392,-0.322503,-0.032833,0.047124,0.199039,0.029338,0.229218,0.332717,-0.065418,-0.049537,-0.099827,0.104879,-0.330045,0.005786,0.033226,-0.020752,0.057987,0.070242,-0.007403,-0.125786,0.0327,-0.06038,-0.205462,0.01644,-0.225221,-0.164052,-0.009564,0.027346,-0.415836,-0.00399,0.073637,-0.02751,0.079395,-0.526031,-0.034474,-0.172066,0.146798,-0.01423,-0.253731,-0.153406,0.055469,-0.149623,0.048758,0.206678,-0.010235,-0.2315,-0.028042,-0.32203,0.000386,-0.067827,0.060302,0.153398,-0.096335,-0.171465,0.069672,-0.201163,-0.197525,-0.038117,-0.217115,-0.05919,0.070471,0.10492,-0.156331,0.286016,-0.138386,0.3538,0.099603,-0.181426,-0.051548,0.213076,-0.06912,0.132981,0.138463,-0.038376,0.021054,-0.219516,-0.140566,-0.084444,0.064184,0.028934,-0.122451,0.064882,0.463686,0.251079,0.075953,0.004005,-0.037281,0.079237,-0.170332,0.057623,-0.036377,0.005248,0.22756,-0.23,0.072941,0.058421,-0.180073,-0.057642,0.036624,0.059222,-0.467863,-0.06749,0.021429,0.259222,0.022373,-0.004172,0.027494,-0.124201,0.485295,-0.01794,-0.073682,-0.124258,0.032412,0.290349,-0.105146,0.104649,0.057688,-0.024621,-0.360868,-0.017753,-0.570182,-0.344348,0.072668,-0.172838,-0.14316,0.128489,-0.400892,-0.153307,-0.155123,-0.262278,0.099289,0.326705,-0.072127,-0.076519,0.381539,0.201454,-0.038758,-0.025633,0.048137,-0.419257,-0.131618,0.099922,0.286213,0.13114,-0.126758,0.03378,-0.011161,0.307413,0.329864,-0.205135,-0.125746,0.35572,-0.096346,-0.364969,0.031467,0.048333,-0.185147,-0.094207,0.224825,0.421651,-0.033179,-0.262686,-0.035756,-0.224029,-0.124589,0.236022,0.292437,0.097369,-0.452613,0.128938,0.174546,0.036142,-0.116396,0.138276,0.025827,0.333554,-0.094881,0.221355,-0.182506,0.134037,0.043718,0.147122,0.023664,-0.174726,-0.259259,0.223636,0.188338,-0.492481,-0.119973,-0.119458,0.190566,-0.138221,-0.282074,0.384044,0.130957,0.208877,0.002304,0.089224,-0.122009,0.004509,-0.172155,0.254072,0.060303,-0.075346,0.326261,0.023847,0.133137,-0.003847,0.211506,-0.069399,0.093876,-0.23427,0.232908,0.098917,-0.016237,0.446685,-0.236552,-0.209134,0.075736,0.126318,0.238087,-0.398839,-0.060284,-0.075257,-0.049164,-0.272771,0.37065,0.326444,0.300355,-0.070459,0.016719,0.034424,0.312062,0.03207,0.011663,0.126233,0.178914,-0.441615,0.040389,-0.372857,-0.247895,-0.034899,0.348515,0.038801,0.02609,0.023452,-0.348433,-0.165552,-0.023735,-0.060267,0.022853,-0.035852,-0.152291,0.042891,-0.009034,-0.131016,-0.371691,-0.299802,0.102941,-0.087184,-0.020854,0.211583,-0.287483,-0.030625,0.059173,0.173975,0.522629,-0.145913,0.060302,0.461561,-0.162295,-0.099364,0.032205,-0.158143,0.072562,-0.377861,0.036535,-0.167583,0.155818,-0.040083,0.38394,-0.136814,-0.108991,-0.109491,-0.072026,0.20128,-0.014778,0.094828,0.056391,0.048843,-0.265739,-0.032236,-0.381977,0.164,0.011801


In [62]:
label_data = train_dataset_new.cuisine
print(label_data.shape)
label_data.head()
final_data = pd.concat([data, label_data], axis=1)
print(final_data.shape)
final_data.head()

(39774,)
(39774, 301)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,cuisine
0,0.285862,-0.092901,-0.104956,-0.141079,-0.343134,0.018658,-0.329705,-0.253953,-0.107836,0.27517,0.12664,-0.164504,0.242465,-0.156059,0.069863,-0.37154,-0.186931,-0.457091,-0.283191,0.018625,0.216196,-0.047563,0.054361,0.331155,-0.221449,0.118537,0.309211,0.099856,-0.195914,-0.216138,0.148422,0.111567,-0.243724,-0.30154,0.153315,0.226266,-0.168114,0.038124,-0.377165,-0.104259,-0.245899,-0.034031,0.186209,-0.246264,-0.355434,-0.02147,-0.128582,0.245688,0.456542,0.075975,-0.102264,0.066549,-0.158218,-0.314325,0.225521,-0.224753,-0.022723,-0.083077,0.166481,0.009899,-0.473658,-0.497112,0.032715,-0.091476,-0.164462,0.171175,0.410027,0.197244,0.495246,-0.095109,0.53143,0.021766,-0.332679,-0.048326,0.279605,-0.022727,0.165361,-0.234565,0.276868,0.110088,0.091306,-0.506916,0.450488,-0.025608,-0.195939,0.263726,0.198808,0.190501,0.238037,0.161614,-0.335157,0.26125,0.366735,-0.320798,0.100499,0.323603,0.206073,0.142174,-0.167733,0.040372,-0.107068,-0.101688,-0.283618,-0.49018,-0.082247,-0.248786,0.073676,-0.392439,0.318291,0.00956,0.045009,0.077192,0.315131,0.101433,0.033035,-0.214375,-0.020706,0.170736,0.102457,-0.132621,0.03606,-0.334531,0.381246,-0.254584,0.082054,-0.383621,-0.089251,0.037412,-0.096276,0.061637,0.219466,-0.288311,0.308858,-0.382105,-0.525282,0.100674,0.017538,0.204352,0.42922,0.464965,0.47738,0.217622,0.14784,0.488478,0.073497,-0.128959,0.139289,0.106418,0.288603,-0.511372,0.066647,-0.127446,-0.046222,0.210256,-0.704312,0.012463,0.436102,0.041509,-0.137014,-0.28883,-0.290843,0.069458,-0.056271,0.170431,0.200217,-0.207172,-0.235331,0.080113,-0.128064,-0.309254,0.199523,0.306442,0.345948,-0.325753,-0.160482,0.48086,-0.206121,-0.029315,-0.06502,0.21912,0.541726,-0.149281,-0.086914,-0.045517,0.417162,0.194302,0.41609,-0.087867,-0.105092,-0.209816,0.194123,0.078157,-0.419996,-0.064581,0.298767,-0.012142,-0.230738,0.116464,0.548734,0.03993,-0.050884,-0.299697,0.354323,0.053719,-0.248611,-0.156978,0.144317,-0.033441,-0.133375,0.318379,0.103274,-0.032953,-0.013313,0.06105,-0.053579,0.615861,-0.205658,0.09013,0.21069,-0.151209,0.239364,-0.31542,-0.266888,0.114705,-0.001046,-0.127042,-0.326912,-0.656359,0.327963,0.084491,-0.351521,0.397267,0.222389,0.096462,0.065139,-0.065133,-0.180603,0.362301,0.122646,-0.202959,0.139354,-0.070015,-0.46328,-0.041579,-0.261727,-0.469859,-0.256715,0.307068,-0.200408,-0.152571,0.095478,-0.561029,-0.068957,0.167482,-0.13248,-0.199525,-0.009716,-0.063114,-0.444418,0.087899,-0.248784,-0.401089,-0.162596,-0.252509,0.20756,-0.360343,0.078712,-0.434182,-0.072858,0.292766,0.145144,0.791459,-0.378336,-0.102891,0.398605,-0.039855,-0.157138,-0.376547,0.213217,0.23456,0.068015,-0.245633,-0.344618,-0.066779,0.12725,0.090009,0.080086,-0.280653,0.233545,0.371713,0.194261,-0.013065,0.269459,-0.091065,-0.206738,-0.20661,-0.142062,-0.137695,0.060239,-0.257914,16
1,0.203618,0.092594,-0.111713,-0.151551,-0.092854,0.162009,-0.126274,-0.005025,0.032185,0.140755,-0.146589,0.071932,0.17138,0.096066,-0.077652,0.197759,-0.004425,-0.133261,-0.146471,-0.288323,0.219627,0.07213,-0.081254,-0.006711,-0.133664,0.074143,-0.20611,-0.066605,0.213317,-0.153791,0.048308,0.021753,0.080857,-0.244494,0.051355,0.053983,0.07639,-0.040799,-0.330558,-0.043871,-0.203573,0.195085,-0.085623,-0.410295,-0.295467,0.140634,0.087754,-0.022836,0.256366,-0.12496,-0.162298,-0.085922,-0.142451,-0.080477,0.171513,0.178582,0.137657,-0.206184,-0.135562,-0.209845,-0.16369,-0.238586,-0.219156,-0.024028,-0.087232,-0.190154,0.141471,-0.184449,0.14053,-0.104247,0.240641,0.332978,-0.31071,-0.203325,0.203066,0.163328,0.227577,0.072039,0.086306,0.090072,-0.064411,-0.138526,-0.231596,0.182317,-0.254161,-0.104525,0.194926,0.300882,0.017396,0.191678,-0.110847,0.05,0.087566,-0.011957,-0.006275,-0.021744,-0.073451,0.133804,-0.122516,0.156166,0.1373,-0.113651,-0.098912,-0.131161,0.10296,-0.43954,-0.133641,-0.109921,0.323531,0.133956,-0.081387,0.165185,-0.239237,0.272974,0.044976,0.103606,-0.18749,0.031573,0.223252,-0.377205,0.406329,-0.161134,0.247495,-0.127054,-0.164372,-0.448515,-0.317078,0.011822,-0.04248,-0.163086,0.085382,-0.066055,-0.021366,-0.195205,-0.31238,0.139328,0.477968,0.048894,-0.032284,0.1976,0.199606,0.010469,-0.053367,0.156863,-0.209604,-0.332467,0.037627,0.297964,0.193407,-0.186004,-0.124629,0.047236,0.241472,0.236084,-0.263842,-0.236942,0.18445,-0.001174,-0.43116,-0.107417,-0.125284,-0.136666,-0.017703,0.236069,0.168242,-0.218667,-0.434346,0.027347,-0.105657,-0.187233,0.116247,0.086795,0.099084,-0.474921,-0.086564,0.269139,-0.217726,-0.133996,-0.012456,0.076382,0.335158,-0.11942,-0.062883,-0.460466,0.246018,-0.118886,0.046795,0.085551,-0.026737,-0.250894,0.175409,0.124365,-0.301016,-0.187728,-0.041285,0.183113,0.026255,-0.161252,0.375214,-0.115217,0.22103,-0.111942,0.025597,-0.07635,0.120117,-0.00232,0.191324,-0.014079,-0.291458,0.300771,0.254621,0.229613,-0.031228,-0.11219,0.147898,0.36352,-0.284971,0.322276,0.114691,-0.03784,0.547195,-0.102793,-0.224127,0.13409,0.015073,0.180249,-0.428845,-0.171952,0.099711,-0.193889,-0.168466,0.176378,0.330643,0.183587,-0.229001,0.057875,0.001483,0.390407,-0.069313,-0.232497,0.20075,0.092258,-0.365005,-0.082577,-0.237838,-0.245629,0.035658,0.356923,-0.034283,-0.065675,-0.156936,-0.356344,0.036653,0.00522,0.089092,-0.044336,-0.085061,-0.009935,0.000492,0.240796,-0.063105,-0.149534,-0.222335,-0.155561,-0.176209,-0.136387,-0.063831,-0.191043,-0.086739,0.118542,0.207997,0.492016,-0.1612,-0.183312,0.352156,-0.132277,-0.12476,-0.033638,-0.143204,0.055683,-0.319014,0.238021,-0.25036,0.122496,0.055985,0.399573,-0.094738,-0.049095,-0.008034,0.376308,-0.232284,0.065084,0.200479,0.061887,0.155471,-0.056409,0.117316,-0.268429,-0.089767,-0.154529,5
2,0.086991,0.0513,0.01407,-0.266688,0.01802,0.076325,-0.072756,0.096848,0.015955,0.083976,0.030003,0.016415,0.055144,-0.042187,-0.079055,0.046444,0.072042,-0.133561,-0.229814,-0.261906,0.058335,0.147804,0.029174,-0.071264,0.093863,-0.002036,-0.190942,-0.163506,0.165119,-0.145068,-0.069798,-0.097826,0.133282,-0.298999,0.230804,0.057313,0.013218,0.090972,-0.111292,0.132975,-0.065516,0.196657,-0.017602,-0.30427,-0.114749,0.01082,0.034512,-0.007646,0.137759,-0.082141,0.076491,-0.03163,-0.070558,-0.090141,-0.16216,-0.007637,0.146679,-0.168815,-0.138505,-0.055556,-0.064851,-0.112405,-0.183395,-0.053764,-0.022,-0.007013,0.127383,-0.027618,0.155692,-0.111284,0.207491,-0.053392,-0.35386,-0.142852,0.05848,0.056025,0.201573,0.061702,0.190515,0.149295,-0.291498,-0.082518,-0.063591,0.19157,-0.098225,-0.07192,0.152201,0.383195,0.218988,-0.029292,-0.078638,-0.224258,0.081143,-0.069394,-0.084662,-0.027341,-0.110882,0.144524,0.012243,0.118314,0.131954,-0.313989,-0.094877,-0.158208,0.024832,-0.252567,-0.010676,-0.162332,0.254706,0.263758,0.05532,-0.019738,0.058388,0.141759,0.113176,0.168175,-0.30858,0.05834,0.03248,-0.074283,0.209353,8e-06,-0.057205,-0.231915,-0.040958,-0.29934,-0.351615,0.001594,0.027232,-0.10826,-0.085282,-0.041014,-0.325083,-0.201304,-0.138187,0.075713,0.506145,0.05243,-0.145433,0.313508,0.068945,-0.085245,-0.119818,-0.040852,-0.373275,-0.106733,0.2706,0.304363,0.060787,-0.396129,-0.148025,-0.059601,0.377488,0.251941,-0.217052,-0.349136,0.091294,-0.006683,-0.145933,-0.270569,0.036486,-0.121327,-0.063068,0.242767,0.145712,-0.154268,-0.298508,-0.011772,-0.077295,0.032714,0.029509,0.363572,0.117202,-0.430462,-0.057564,0.207152,-0.050183,-0.101038,0.204491,-0.074599,0.34362,-0.003285,-0.09137,-0.284214,0.179654,-0.124838,0.082732,0.042469,0.003499,-0.191368,-0.01926,0.054916,-0.243503,-0.03415,0.050453,0.238026,0.034949,-0.105499,0.141685,-0.158613,0.153561,0.044685,0.172649,-0.243594,0.069262,-0.058605,0.210959,-0.051869,-0.011404,0.157893,0.15409,-0.019697,0.133004,-0.087116,0.026827,0.317569,-0.144775,0.212823,0.148963,0.156032,0.41334,-0.050362,-0.055896,0.009347,-0.10885,0.112609,-0.257373,-0.14128,-0.019169,-0.123292,-0.22554,0.187797,0.30691,0.07453,-0.11931,-0.011511,-0.012722,0.346503,0.068656,0.015146,0.156293,0.17561,-0.180532,-0.123592,-0.103184,-0.216427,-0.00312,0.348377,-0.025528,-0.086383,-0.016797,-0.324482,-0.008742,0.043135,-0.094435,-0.116056,-0.070978,0.037155,-0.057271,0.084891,-0.027711,-0.31938,-0.131621,-0.021173,-0.103985,0.126059,-0.131131,-0.325603,-0.179388,0.072691,0.129529,0.519602,-0.16376,-0.084352,0.333454,-0.179883,-0.164571,-0.172519,-0.285628,-0.107953,-0.296714,0.039392,-0.183681,0.272956,0.009284,0.389824,-0.238702,-0.032408,-0.084026,0.157282,-0.006993,-0.125822,-0.024705,0.154116,-0.002896,-0.341837,0.098061,-0.143044,0.110434,0.07376,19
3,0.566164,0.206852,-0.044593,-0.279375,-0.009993,0.677138,-0.397538,-0.0308,0.025899,0.212931,-0.188754,-0.024006,0.301748,-0.081315,-0.09174,-0.045275,-0.104239,-0.192319,-0.220626,-0.115645,0.06668,0.098953,0.054537,-0.028961,-0.137575,0.097442,-0.222266,-0.130018,0.084331,0.145453,0.006144,0.167078,0.074369,-0.259616,0.066693,-0.211595,0.05754,-0.038198,-0.290052,-0.212264,-0.148861,0.148719,-0.116703,-0.099126,-0.096133,0.03178,-0.036027,-0.226385,-0.08908,-0.224905,-0.072101,-0.047113,0.232748,0.059157,0.000774,0.257302,0.048151,-0.0581,-0.294304,0.015604,-0.023176,-0.06183,0.017336,-0.116317,0.035836,-0.042511,0.218118,-0.095266,-0.050814,0.008569,0.042483,-0.009109,-0.245612,0.158653,0.011455,-0.109488,0.032467,0.016147,-0.139279,0.035068,0.144876,0.02479,-0.129388,0.028952,-0.07409,-0.208479,0.059405,0.044225,0.151583,0.1752,0.17515,-0.062551,0.17655,-0.188157,-0.230918,-0.15314,-0.076526,-0.023603,-0.088532,-0.016176,0.095291,-0.045558,-0.006392,0.017826,0.008417,-0.132197,-0.083278,0.003563,0.323005,0.067964,-0.146875,0.042503,-0.121067,0.225914,-0.024081,0.033169,-0.088488,0.137004,-0.002882,-0.159305,0.070007,0.125328,0.061074,-0.142534,-0.132201,-0.212362,-0.187717,0.145115,0.013065,-0.175926,0.083367,-0.280243,-0.102782,0.03686,-0.054244,0.075556,0.167962,-0.126006,-0.016544,0.101905,0.213466,-0.108549,-0.083249,0.072884,-0.415505,-0.154999,-0.00492,0.210361,0.093828,-0.183419,0.033974,0.037036,0.252661,0.249881,-0.014717,-0.128992,0.273096,-0.151579,-0.228815,-0.054454,-0.025364,-0.204476,-0.107827,0.118701,0.131734,-0.070491,-0.333688,-0.011806,-0.14486,-0.07087,0.194579,0.270571,0.136618,-0.367978,-0.042273,0.143486,-0.219075,-0.05774,0.123852,-0.003964,0.163066,-0.078155,0.123481,-0.119476,0.191134,-0.124601,-0.068136,0.053538,-0.059735,-0.34829,0.018004,0.123338,-0.369224,-0.033356,-0.009858,0.129576,-0.041675,-0.165888,0.162072,0.102491,0.152568,-0.012879,0.054794,-0.113366,-0.002827,-0.134159,0.154497,-0.01414,-0.134878,0.210294,0.23135,0.184908,-0.0851,-0.062744,0.105669,0.150353,-0.278646,0.076226,0.049133,0.031725,0.443011,-0.084754,-0.228655,0.04053,-0.04781,-0.056666,-0.395298,-0.11526,0.057823,-0.019467,-0.170847,0.255849,0.292331,0.270784,-0.151394,0.087736,-0.119623,0.309142,-0.080884,-0.130957,0.154624,0.180055,-0.340308,-0.058732,-0.172239,-0.3577,-0.104967,0.109876,-0.069765,0.146617,0.017982,-0.306961,-0.012606,0.192311,0.04651,0.017833,-0.073205,0.107341,-0.086663,0.197276,-0.0756,-0.217188,-0.031998,-0.13491,0.035158,0.037328,0.000264,-0.416864,-0.25126,-0.014332,0.322554,0.56196,-0.188096,0.053132,0.475206,-0.102501,-0.364149,-0.196387,-0.300677,-0.025753,-0.153061,0.023657,-0.125456,0.40506,0.043828,0.242333,-0.108979,0.045264,-0.100062,0.305589,-0.08487,0.068211,0.055009,0.204943,0.145478,-0.226321,0.10335,-0.060725,0.169908,0.003326,6
4,0.108246,0.150378,0.062828,-0.179181,0.032288,0.376392,-0.322503,-0.032833,0.047124,0.199039,0.029338,0.229218,0.332717,-0.065418,-0.049537,-0.099827,0.104879,-0.330045,0.005786,0.033226,-0.020752,0.057987,0.070242,-0.007403,-0.125786,0.0327,-0.06038,-0.205462,0.01644,-0.225221,-0.164052,-0.009564,0.027346,-0.415836,-0.00399,0.073637,-0.02751,0.079395,-0.526031,-0.034474,-0.172066,0.146798,-0.01423,-0.253731,-0.153406,0.055469,-0.149623,0.048758,0.206678,-0.010235,-0.2315,-0.028042,-0.32203,0.000386,-0.067827,0.060302,0.153398,-0.096335,-0.171465,0.069672,-0.201163,-0.197525,-0.038117,-0.217115,-0.05919,0.070471,0.10492,-0.156331,0.286016,-0.138386,0.3538,0.099603,-0.181426,-0.051548,0.213076,-0.06912,0.132981,0.138463,-0.038376,0.021054,-0.219516,-0.140566,-0.084444,0.064184,0.028934,-0.122451,0.064882,0.463686,0.251079,0.075953,0.004005,-0.037281,0.079237,-0.170332,0.057623,-0.036377,0.005248,0.22756,-0.23,0.072941,0.058421,-0.180073,-0.057642,0.036624,0.059222,-0.467863,-0.06749,0.021429,0.259222,0.022373,-0.004172,0.027494,-0.124201,0.485295,-0.01794,-0.073682,-0.124258,0.032412,0.290349,-0.105146,0.104649,0.057688,-0.024621,-0.360868,-0.017753,-0.570182,-0.344348,0.072668,-0.172838,-0.14316,0.128489,-0.400892,-0.153307,-0.155123,-0.262278,0.099289,0.326705,-0.072127,-0.076519,0.381539,0.201454,-0.038758,-0.025633,0.048137,-0.419257,-0.131618,0.099922,0.286213,0.13114,-0.126758,0.03378,-0.011161,0.307413,0.329864,-0.205135,-0.125746,0.35572,-0.096346,-0.364969,0.031467,0.048333,-0.185147,-0.094207,0.224825,0.421651,-0.033179,-0.262686,-0.035756,-0.224029,-0.124589,0.236022,0.292437,0.097369,-0.452613,0.128938,0.174546,0.036142,-0.116396,0.138276,0.025827,0.333554,-0.094881,0.221355,-0.182506,0.134037,0.043718,0.147122,0.023664,-0.174726,-0.259259,0.223636,0.188338,-0.492481,-0.119973,-0.119458,0.190566,-0.138221,-0.282074,0.384044,0.130957,0.208877,0.002304,0.089224,-0.122009,0.004509,-0.172155,0.254072,0.060303,-0.075346,0.326261,0.023847,0.133137,-0.003847,0.211506,-0.069399,0.093876,-0.23427,0.232908,0.098917,-0.016237,0.446685,-0.236552,-0.209134,0.075736,0.126318,0.238087,-0.398839,-0.060284,-0.075257,-0.049164,-0.272771,0.37065,0.326444,0.300355,-0.070459,0.016719,0.034424,0.312062,0.03207,0.011663,0.126233,0.178914,-0.441615,0.040389,-0.372857,-0.247895,-0.034899,0.348515,0.038801,0.02609,0.023452,-0.348433,-0.165552,-0.023735,-0.060267,0.022853,-0.035852,-0.152291,0.042891,-0.009034,-0.131016,-0.371691,-0.299802,0.102941,-0.087184,-0.020854,0.211583,-0.287483,-0.030625,0.059173,0.173975,0.522629,-0.145913,0.060302,0.461561,-0.162295,-0.099364,0.032205,-0.158143,0.072562,-0.377861,0.036535,-0.167583,0.155818,-0.040083,0.38394,-0.136814,-0.108991,-0.109491,-0.072026,0.20128,-0.014778,0.094828,0.056391,0.048843,-0.265739,-0.032236,-0.381977,0.164,0.011801,6


In [63]:
print("Indices of NaN: ", pd.isnull(data).any(1).nonzero()[0])
final_data= final_data.dropna()
print("Indices of NaN: ", pd.isnull(final_data).any(1).nonzero()[0])
output_label = final_data['cuisine']
final_data.drop(['cuisine'],axis=1,inplace=True)

Indices of NaN:  [13178 23512]
Indices of NaN:  []


In [64]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(final_data, output_label, test_size=0.15, random_state=42)

In [65]:
#np.where(np.isnan(data))
#np.isnan(data)
#print("Indices of NaN: ", pd.isnull(data).any(1).nonzero()[0])
#print(data.loc[[13178]])
#print(data.loc[[23512]])
#data.drop(data.index[13178],inplace=True)
#data.drop(data.index[23512],inplace=True)
#label_data.drop(data.index[13178],inplace=True)
#label_data.drop(data.index[23512],inplace=True)
#label_data.drop(data.index[23511],inplace=True)
#print("Indices of NaN: ", pd.isnull(data).any(1).nonzero()[0])

In [67]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)

print("train: ",clf.score(X_train,y_train))
print("test: ",clf.score(X_test,y_test))

train:  0.6521031769508371
test:  0.6345960442507542


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.model_selection import cross_val_score, GridSearchCV
params_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
svm_model = GridSearchCV(SVC(), params_grid, cv=5)
svm_model.fit(X_train, y_train)

In [None]:
print('Best score for training data:', svm_model.best_score_,"\n") 