# Zoo Animal Classification 

## Importing Libraries

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.simplefilter("ignore")

## Loading up the data

In [28]:
class1 = pd.read_csv("../input/zoo-animal-classification/class.csv")
class1

In [29]:
df = pd.read_csv("../input/zoo-animal-classification/zoo.csv")
df.head()

In [30]:
df.shape

In [31]:
df["class_type"].unique()

In [32]:
list(df)

In [33]:
df.drop("animal_name", axis=1, inplace=True)
df.head()

In [34]:
# Checking for missing values in the dataset
df.isna().sum()

In [35]:
df.dtypes

In [36]:
# Having a look at the correlation matrix

fig, ax = plt.subplots(figsize=(15,10))
sns.heatmap(df.corr(), annot=True, fmt='.1g', cmap="viridis", cbar=False, linewidths=0.5, linecolor='black');

In [37]:
NumberofPlayers = df.groupby(pd.Grouper(key='class_type')).size().reset_index(name='count')
fig = px.treemap(NumberofPlayers, path=['class_type'], values='count')
fig.update_layout(title_text='Animal Class Types Distribution Tree',
                  title_x=0.5, title_font=dict(size=25))
fig.update_traces(textinfo="label+value")

fig.show()

In [38]:
x = df['class_type'].value_counts().index.tolist()
y = df['class_type'].value_counts().tolist()

fig = px.bar(x=x, y=y, color=x, title="Animal Class Type Distribution",
             labels={
                'x': 'Animal Class',
                'y': 'count'
                },)
fig.show()

In [39]:
labels = ["Type 1","Type 2","Type 3","Type 4","Type 5","Type 6","Type 7"]
values = df['class_type'].value_counts().tolist()

px.pie(df, values=values, names=labels, title="Animal Class Type Distribution Pie Chart")

The 7 Class Types are:

`1. Mammal` 

`2. Bird` 

`3. Reptile`

`4. Fish`

`5. Amphibian` 

`6. Bug`

`7. Invertebrate`

In [40]:
plt.style.use("ggplot")
fig, ax = plt.subplots(figsize=(8,6))
sns.countplot(df["legs"]);

* Most of the animals have 4 legs
* Least number of animals have 5 legs

In [41]:
plt.style.use("ggplot")
fig, ax =plt.subplots(5,3, figsize=(15,25)) 

sns.countplot(df["domestic"], palette="viridis", ax=ax[0,0])
sns.countplot(df["hair"], palette="viridis", ax=ax[0,1])
sns.countplot(df["feathers"], palette="viridis", ax=ax[0,2])

sns.countplot(df["eggs"], palette="viridis", ax=ax[1,0])
sns.countplot(df["milk"], palette="viridis", ax=ax[1,1])
sns.countplot(df["airborne"], palette="viridis", ax=ax[1,2])

sns.countplot(df["aquatic"], palette="viridis", ax=ax[2,0])
sns.countplot(df["predator"], palette="viridis", ax=ax[2,1])
sns.countplot(df["toothed"], palette="viridis", ax=ax[2,2])

sns.countplot(df["backbone"], palette="viridis", ax=ax[3,0])
sns.countplot(df["breathes"], palette="viridis", ax=ax[3,1])
sns.countplot(df["venomous"], palette="viridis", ax=ax[3,2])

sns.countplot(df["fins"], palette="viridis", ax=ax[4,0])
sns.countplot(df["tail"], palette="viridis", ax=ax[4,1])
sns.countplot(df["catsize"], palette="viridis", ax=ax[4,2]);

## Splitting the data into training and test datasets
Here, we are trying to predict the class type of the animal using the given data. Hence, the `class_type` will be the y label and rest of the data will be the X or the input data.

In [42]:
# X data
X = df.drop("class_type", axis=1)
X.head()

In [43]:
# y data
y = df["class_type"]
y.head()

In [44]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [45]:
len(X_train), len(X_test)

## Linear Regression

In [46]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [47]:
LinearRegressionScore = lr.score(X_test, y_test)
print("Accuracy obtained by Linear Regression model:",LinearRegressionScore*100)

## Random Forest Classifier

In [48]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 100)
rf.fit(X_train,y_train)

In [49]:
RandomForestClassifierScore = rf.score(X_test, y_test)
print("Accuracy obtained by Random Forest Classifier model:",RandomForestClassifierScore*100)

## K Neighbors Classifier

In [50]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(4)
knn.fit(X_train,y_train)

In [51]:
KNeighborsClassifierScore = knn.score(X_test, y_test)
print("Accuracy obtained by K Neighbors Classifier model:",KNeighborsClassifierScore*100)

In [52]:
x = ["K Neighbors Classifier",
     "Random Forest Classifier",
     "Linear Regression",
    ]

y = [KNeighborsClassifierScore,
     RandomForestClassifierScore,
     LinearRegressionScore,
     ]

fig = px.bar(x=x, y=y, color=x, title="Model Comparison - Model Accuracy",
             labels={
                'x': 'Model',
                'y': 'Model Accuracy'},)
fig.show()