In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import math

df = pd.read_csv("C:/Users/garga/Desktop/Mush.csv") # data is in the pandas data frame format
df.keys()
# Taking the feature columns in a list
feat_cols = ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
      'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-color', 'ring-number', 'ring-type',
       'spore-print-color', 'population', 'habitat']
label_encoder = LabelEncoder()

df = df.apply(label_encoder.fit_transform)

X = df[feat_cols]
Y = df['class']

In [None]:
# E vs P
plt.rcParams['figure.figsize']=15,5
ax = plt.subplot(122)
mush_classpie = df['class'].value_counts()
mush_size = mush_classpie.values.tolist()
mush_types = mush_classpie.axes[0].tolist()
mush_labels = 'Edible', 'Poisonous'
colors = ['#EAFFD0', '#F38181']
plt.title('Mushroom Class Type Percentange', fontsize=10)
patches, texts, autotexts = plt.pie(mush_size, labels=mush_labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=150)
for text,autotext in zip(texts,autotexts):
    text.set_fontsize(14)
    autotext.set_fontsize(14)

plt.axis('equal')  
plt.show()

In [None]:
#Числа на столбиках - количество подходящих условию грибов
def label(bars,fontsize=9):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., 1*height,'%d' % int(height),
                ha='center', va='bottom',fontsize=fontsize)

In [None]:
#Цвет шляпки и количество грибов
plt.rcParams['figure.figsize']=20,5
cap_colors = df['cap-color'].value_counts()
m_height = cap_colors.values.tolist()
#Row labels
cap_colors.axes
m_height = cap_colors.values.tolist()
#Row labels
cap_colors.axes
#Converts index to list
cap_color_labels = cap_colors.axes[0].tolist()
#the x locations for the groups
ind = np.arange(10)


ax = plt.subplot(122)
poisonous_cc = [] #Poisonous color cap list
edible_cc = []    #Edible color cap list

for capColor in cap_color_labels:
    size = len(df[df['cap-color'] == capColor].index)
    edibles = len(df[(df['cap-color'] == capColor) & (df['class'] == 0)].index)
    edible_cc.append(edibles)
    poisonous_cc.append(size-edibles)
                        
width = 0.4
# fig, ax = plt.subplots(figsize=(7,4))
edible_bars = ax.bar(ind, edible_cc , width, color='#EAFFD0')
poison_bars = ax.bar(ind+width, poisonous_cc , width, color='#F38181')

#Надписи
ax.set_xlabel('---Cap Color---',fontsize=10)
ax.set_ylabel('---Quantity---',fontsize=10)
ax.set_title('Mushrooms Cap Color & Class',fontsize=12)
ax.set_xticks(ind + width / 2) #Positioning on the x axis
#ax.set_xticklabels(('Brown', 'Gray','Red','Yellow','White','Buff','Pink','Cinnamon','Purple','Green'), fontsize = 10, rotation=45)
ax.legend((edible_bars,poison_bars),('Edible','Poisonous'),fontsize=10)
label(edible_bars, 10)
label(poison_bars, 10)
plt.show()

In [None]:
# Два других шляпковых параметра
f, axes = plt.subplots(2,1, figsize=(20,5), sharey = True) 
num_col = ['cap-surface','cap-shape']
for i,col in enumerate(num_col):
    axes[i] = plt.subplot(1,2,i+1)
    s = sns.countplot(x=col, data = df, hue='class', alpha=0.7)
    s.legend(loc="upper right", prop={'size': 10})
    for p in s.patches:
        s.annotate(format(p.get_height(), '.0f'), 
        (p.get_x() + p.get_width() / 2., p.get_height()), 
        ha = 'center', va = 'center', 
        xytext = (0, 5), 
        textcoords = 'offset points')

plt.show()

In [None]:
# Gill-параметры
f, axes = plt.subplots(4,1, figsize=(20,15), sharey = True) 
num_col = ['gill-attachment', 'gill-spacing', 'gill-size', 'gill-color']
for i,col in enumerate(num_col):
    plt.subplot(2,2,i+1)
    plt.xticks(rotation=45)
    s = sns.countplot(x=col, data = df, hue='class', palette="pastel")
    for p in s.patches:
        s.annotate(format(p.get_height(), '.0f'), 
        (p.get_x() + p.get_width() / 2., p.get_height()), 
        ha = 'center', va = 'center', 
        xytext = (0, 5), fontsize=8.5,
        textcoords = 'offset points')

plt.show()

In [None]:
# Stalk-параметры

f, axes = plt.subplots(4,1, figsize=(17,12), sharey = True) 
num_col = ['stalk-shape', 'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring']

for j,col in enumerate(num_col):
    cols = df[col].value_counts()
    pop_size = cols.values.tolist()
    pop_types = cols.axes[0].tolist()
    poisonous_pop = [] #Poisonous population type list
    edible_pop = []    #Edible population type list
    for pop in pop_types: 
        size = len(df[df[col] == pop].index)
        edibles = len(df[(df[col] == pop) & (df['class'] == 0)].index)
        edible_pop.append(edibles)
        poisonous_pop.append(size-edibles)
        combine_ed_poi = []
    for i in range(0,len(edible_pop)):
        combine_ed_poi.append(edible_pop[i])
        combine_ed_poi.append(poisonous_pop[i])
    #Двойная пироговая диаграмма
    plt.subplot(2,2,j+1)
    plt.title(col)
    #Внутренность
    patches1, texts1 = plt.pie(combine_ed_poi,radius = 4.5,labels= combine_ed_poi,
                                    colors=['#C4F6F5','#F6EEC4'], shadow=True, labeldistance= 1.1)
    for i in range(0,len(texts1)):
        if(i%2==0):
            texts1[i].set_color('blue')
        else:
            texts1[i].set_color('red')
    for aut in texts1:
        aut.set_fontsize(9)
    #Окружность
    patches2, texts2, autotexts2 = plt.pie(pop_size, radius = 3.5,
            autopct='%1.2f%%', shadow=True, labeldistance= 4.2)
    for aut in autotexts2:
        aut.set_fontsize(10)
        aut.set_horizontalalignment('center')
     #Две легенды пироговые
    first_legend   = plt.legend(patches1, ['Edible','Poisonous'], loc="upper left", fontsize=10)
    second_ledgend = plt.legend(patches2, pop_types, loc="best",fontsize=8)
    plt.gca().add_artist(first_legend)
    plt.axis('equal')
plt.show()

In [None]:
#Veil-параметры

f, axes = plt.subplots(3,1, figsize=(20,5), sharey = True) 
num_col = ['veil-color', 'ring-number', 'ring-type']
for i,col in enumerate(num_col):
    axes[i] = plt.subplot(1,3,i+1)
    s = sns.countplot(x=col, data = df, hue='class', alpha=0.7, palette='Set1')
    s.legend(loc="upper right", prop={'size': 10})
    plt.xticks(rotation=45)
    for p in s.patches:
        s.annotate(format(p.get_height(), '.0f'), 
        (p.get_x() + p.get_width() / 2., p.get_height()), 
        ha = 'center', va = 'center', 
        xytext = (0, 5), 
        textcoords = 'offset points')

plt.show()

In [None]:
#'bruises', 'population', 'habitat'
f, axes = plt.subplots(3,1, figsize=(20,12), sharey = True) 
num_col = ['bruises', 'population', 'habitat']

for j,col in enumerate(num_col):
    cols = df[col].value_counts()
    pop_size = cols.values.tolist()
    pop_types = cols.axes[0].tolist()
    poisonous_pop = [] #Poisonous population type list
    edible_pop = []    #Edible population type list
    for pop in pop_types: 
        size = len(df[df[col] == pop].index)
        edibles = len(df[(df[col] == pop) & (df['class'] == 0)].index)
        edible_pop.append(edibles)
        poisonous_pop.append(size-edibles)
    combine_ed_poi = []
    for i in range(0,len(edible_pop)):
        combine_ed_poi.append(edible_pop[i])
        combine_ed_poi.append(poisonous_pop[i])

    #Двойная пироговая диаграмма опять
    plt.subplot(2,2,j+1)
    plt.title(col)
    #Кольцо
    patches1, texts1 = plt.pie(combine_ed_poi,radius = 4.5,labels= combine_ed_poi,
                                    colors=['#C4F6F5','#F6EEC4'], shadow=True, labeldistance= 1.1)
    for i in range(0,len(texts1)):
        if(i%2==0):
            texts1[i].set_color('blue')
        else:
            texts1[i].set_color('red')
    for aut in texts1:
        aut.set_fontsize(9)
    #Внутренность
    patches2, texts2, autotexts2 = plt.pie(pop_size, radius = 3.5,
            autopct='%1.2f%%', shadow=True, labeldistance= 4.2)
    for aut in autotexts2:
        aut.set_fontsize(10)
        aut.set_horizontalalignment('center')
    #Снова 2 легенды для пирогов
    first_legend   = plt.legend(patches1, ['Edible','Poisonous'], loc="upper left", fontsize=10)
    second_ledgend = plt.legend(patches2, pop_types, loc="best",fontsize=8)
    plt.gca().add_artist(first_legend)
    plt.axis('equal')
plt.show()


In [None]:
# 'odor', 'spore-print-color' штуки
f, axes = plt.subplots(2,1, figsize=(20,5), sharey = True) 
num_col = ['odor', 'spore-print-color']
for i,col in enumerate(num_col):
    axes[i] = plt.subplot(1,2,i+1)
    s = sns.countplot(x=col, data = df, hue='class', alpha=0.7, palette='hot')
    s.legend(loc="upper right", prop={'size': 10})
    plt.xticks(rotation=45)
    for p in s.patches:
        s.annotate(format(p.get_height(), '.0f'), 
        (p.get_x() + p.get_width() / 2., p.get_height()), 
        ha = 'center', va = 'center', 
        xytext = (0, 5), 
        textcoords = 'offset points')

plt.show()