[notebook](https://www.kaggle.com/code/deveshupreti/models-comparision-food-ingredients-and-allergens) from Kaggle

## **Basic Libraries**

In [None]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

## Reading **"csv"**

In [None]:
df = pd.read_csv("/kaggle/input/food-ingredients-and-allergens/food_ingredients_and_allergens.csv")
df.head()

## **Exploratory Data Analysis**

### **Shape**

In [None]:
df.shape

### Checking **"Duplicates"**

In [None]:
sum(df.duplicated())

### Removing **"Duplicates"**

In [None]:
df.drop_duplicates(inplace = True)
df.shape

### Checking **"NA"**

In [None]:
df.isna().sum()


### Removing **"NA"**

In [None]:
df.dropna(inplace = True)
df.shape

### Checking **"columns"** and **"nunique"** values in these columns

In [None]:
df.columns

In [None]:
for item in list(df.columns):
    print(item , "   " , df[item].nunique())

### Checking Similarity Between **"Allergens"** and **"Prediction"**

In [None]:
df[(df['Allergens'] == 'None')&(df['Prediction']=="Contains")]


    5 rows showing dissimilarity between **"Allergens"** and **"Prediction"** Columns

### Sorting it out by changing **'Contains'** to **'Does not contain'** in **"Prediction"** column

In [None]:
df['Prediction'] = np.where((df['Allergens'] == 'None')&(df['Prediction']=="Contains"),"Does not contain",df['Prediction'])

In [None]:
df[(df['Allergens'] == 'None')&(df['Prediction']=="Contains")]

## Creating **"X"** AND **"y"**

In [None]:
X = df.drop(['Prediction','Allergens'], axis  =1)
y = pd.DataFrame(df['Prediction'])

## **Mapping "Numeric" values to all "Text" data in columns**

In [None]:
for column in list(X.columns):
    X[column+' '+'Label'] = X[column].map(dict(zip(pd.Series(X[column].unique()),pd.Series([i for i in range(X[column].nunique())]))))
    

In [None]:
X.drop(['Food Product', 'Main Ingredient', 'Sweetener', 'Fat/Oil', 'Seasoning'], axis =1 , inplace = True)
X.head()

In [None]:
y.value_counts()

In [None]:
y['Prediction Label'] = y['Prediction'].map({"Contains":1,"Does not contain" : 0})
y.drop(['Prediction'], axis =1 , inplace =True)
y.head()

## **"train_test_split"**

In [None]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X,y , test_size=0.2 , random_state=23, stratify=y)

## **"RandomForestClassifier"**

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

model.fit(X_train , y_train)

rf_score = model.score(X_test , y_test)

rf_score

## **"DecisionTreeClassifier"**

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()

model.fit(X_train , y_train)

dt_score = model.score(X_test , y_test)

dt_score

## **"KNeighborsClassifier"**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()

model.fit(X_train , y_train)

kn_score = model.score(X_test , y_test)

kn_score

## **"SVC"**

In [None]:
from sklearn.svm import SVC

model = SVC()

model.fit(X_train , y_train)

svc_score = model.score(X_test , y_test)

svc_score

## **"score_df"**

In [None]:
score = { 
    'Model_Name' : ['Random Forest','Decision Tree','K Neighbors','SVC '],
       'Score'   : [rf_score, dt_score,kn_score,svc_score]
        }

score_df= pd.DataFrame(score)
score_df



## **"MODEL AND THEIR SCORES"**

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize = (8, 5))
barplot = plt.bar(score_df.Model_Name, score_df.Score,color =['red', 'yellow', 'pink', 'cyan'] ,edgecolor = "black", width = 0.25)
plt.bar_label(barplot , labels= round(score_df.Score,3), label_type='center')
plt.xlabel("Model Name")
plt.ylabel("Score")
plt.title("MODEL AND THEIR SCORES")
plt.show()

## **Thank You !**