In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
mushroom = pd.read_csv('../../../DATA/mushrooms.csv')
mushroom.head()

In [None]:
mushroom.isnull().sum()

In [None]:
sns.countplot(data=mushroom,x='class')

In [None]:
unique = mushroom.describe().transpose().reset_index().sort_values('unique')

In [None]:
plt.figure(figsize=(14,10),dpi=250)
sns.barplot(data=unique,x='index',y='unique')
plt.xticks(rotation=90)

In [None]:
X = mushroom.drop('class',axis=1)
X = pd.get_dummies(X,drop_first=True)
y = mushroom['class']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.15,random_state=101)

In [None]:
from sklearn.ensemble import AdaBoostClassifier

In [None]:
model = AdaBoostClassifier(n_estimators=1)

In [None]:
model.fit(X_train,y_train)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
pred = model.predict(X_test)
pred

In [None]:
print(classification_report(y_test,pred))

In [None]:
confusion_matrix(y_test,pred)

In [None]:
accuracy_score(y_test,pred)

In [None]:
model.feature_importances_.argmax()

In [None]:
X.columns[22]

In [None]:
sns.countplot(data=mushroom,x='odor',hue='class')

In [None]:
error_rates = []

for n in range(1,96):
  model = AdaBoostClassifier(n_estimators=n)
  model.fit(X_train,y_train)
  preds = model.predict(X_test)
  error = 1 - accuracy_score(y_test,preds)
  error_rates.append(error)

In [None]:
error_rates

In [None]:
plt.plot(range(1,96),error_rates)

In [None]:
model.feature_importances_

In [None]:
features = pd.DataFrame(index=X.columns,data=model.feature_importances_,columns=['Importance'])
features

In [None]:
important_features = features[features['Importance']>0]
plt.figure(figsize=(12,8),dpi=250)
sns.barplot(data=important_features.sort_values('Importance'),x=important_features.index,y='Importance')
plt.xticks(rotation=90)