<a href="https://colab.research.google.com/github/Nikunj-1308/Cuisine_Prediction/blob/master/Cuisine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cuisine Prediction

## **Importing the required Libraries**

In [0]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
import pandas as pd
import numpy as np
import json


## Importing Dataset from Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')


In [0]:
train = json.load(open('/content/drive/My Drive/Cuisine Prediction/train.json'))
test = json.load(open('/content/drive/My Drive/Cuisine Prediction/test.json'))

## Preparing Data for Training & Testing

In [0]:
def generate_text(data):
	text_data = [" ".join(doc['ingredients']).lower() for doc in data]
	return text_data 

train_text = generate_text(train)
test_text = generate_text(test)
target = [doc['cuisine'] for doc in train]

In [0]:
print(target)
print(train_text[1])   
print(test_text[2])

## **Feature Engineering**

In [0]:
tfidf = TfidfVectorizer(binary=True)
def tfidf_features(txt, flag):
    if flag == "train":
    	x = tfidf.fit_transform(txt)
    else:
	    x = tfidf.transform(txt)
    x = x.astype('float16')
    return x 
X = tfidf_features(train_text, flag="train")
X_test = tfidf_features(test_text, flag="test")

## **Target Label Encoding**

In [0]:
print ("Label Encode the Target Variable ... ")
lb = LabelEncoder()
y = lb.fit_transform(target)


## **Training the Model**

In [0]:
classifier = SVC(C=100, # penalty parameter
	 			 kernel='rbf', # kernel type, rbf working fine here (radial basis function)
	 			 degree=3, # default value
	 			 gamma=1, # kernel coefficient
	 			 coef0=1, # change to 1 from default value of 0.0
	 			 shrinking=True, # using shrinking heuristics
	 			 tol=0.001, # stopping criterion tolerance 
	      		 probability=False, # no need to enable probability estimates
	      		 cache_size=200, # 200 MB cache size
	      		 class_weight=None, # all classes are treated equally 
	      		 verbose=False, # print the logs 
	      		 max_iter=-1, # no limit, let it run
          		 decision_function_shape=None, # will use one vs rest explicitly 
          		 random_state=None)
model = OneVsRestClassifier(classifier, n_jobs=4)
model.fit(X, y)

## **Prediction on Test Data**

In [0]:
y_test = model.predict(X_test)
y_pred = lb.inverse_transform(y_test)

## **Storing the Output**

In [0]:
test_id = [doc['id'] for doc in test]
sub = pd.DataFrame({'id': test_id, 'cuisine': y_pred}, columns=['id', 'cuisine'])
sub.to_csv('svm_output.csv', index=False)