In [1]:
import json
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier

### Reading train and test data as dictionary

In [2]:
train = json.load(open('../data/train.json'))
test = json.load(open('../data/test.json'))

### Converting ingredients to text data

In [3]:
train_as_text = [' '.join(sample['ingredients']).lower() for sample in train]
train_cuisine = [sample['cuisine'] for sample in train]

test_as_text = [' '.join(sample['ingredients']).lower() for sample in test]

### Tfidf vectorizer for text data

In [4]:
tfidf_enc = TfidfVectorizer(binary=True)
lbl_enc = LabelEncoder()

X = tfidf_enc.fit_transform(train_as_text)
X = X.astype('float16')

X_test = tfidf_enc.transform(test_as_text)
X_test = X_test.astype('float16')

y = lbl_enc.fit_transform(train_cuisine)

### SVC with OVR model

In [5]:
clf = SVC(C=100, kernel='rbf', degree=3,
          gamma=1, coef0=1, shrinking=True, 
          probability=False, tol=0.001, cache_size=200,
          class_weight=None, verbose=True, max_iter=-1,
          decision_function_shape=None, random_state=None)
model = OneVsRestClassifier(clf, n_jobs=4)
model.fit(X,y)

[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]

OneVsRestClassifier(estimator=SVC(C=100, cache_size=200, class_weight=None, coef0=1,
  decision_function_shape=None, degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=True),
          n_jobs=4)

### Prediction

In [6]:
y_test = model.predict(X_test)
test_cuisine = lbl_enc.inverse_transform(y_test)

### Writing to submission folder

In [11]:
test_id = [sample['id'] for sample in test]

submission_df = pd.DataFrame({'id': test_id, 'cuisine': test_cuisine}, columns=['id', 'cuisine'])
submission_df.to_csv('../submission/svm_submission.csv', index=False)