In [1]:
!pip install datascience
!pip install wget
!pip install plotly
!pip install pdb

Collecting pdb


  Could not find a version that satisfies the requirement pdb (from versions: )
No matching distribution found for pdb


In [1]:
from datascience import *
from plotly.offline import plot
import numpy as np
import wget
import plotly.plotly as py
import plotly.graph_objs as go
import math

In [62]:
#Instantiating useful tables
mushroomData = Table.read_table('D:\Datasets\mushroom-characteristics.csv')
poisonShrooms = mushroomData.where([x for x in mushroomData['class'] == "'p'"])
edibleShrooms = mushroomData.where([x for x in mushroomData['class'] == "'e'"])

#Defining useful variables and values
labels = list(mushroomData.labels)
indices = np.arange(22)
characteristicsDictionary = {k:v for k,v in zip(characteristics, indices)}

#Creating dictionaries in order to track characteristics and frequencies of characteristics
poisonDict = {}
for label in labels:
    poisonDict[label] = {"'a'": 0, "'b'": 0, "'c'": 0, "'d'": 0, "'e'": 0, "'f'": 0, "'g'": 0, "'h'": 0, "'i'": 0, "'j'": 0,
                         "'k'": 0, "'l'": 0, "'m'": 0, "'n'": 0, "'o'": 0, "'p'": 0, "'q'": 0, "'r'": 0, "'s'": 0, "'t'": 0,
                         "'u'": 0, "'v'": 0, "'w'": 0, "'x'": 0, "'y'": 0, "'z'": 0, '?': 0}
edibleDict = {}
for label in labels:
    edibleDict[label] = {"'a'": 0, "'b'": 0, "'c'": 0, "'d'": 0, "'e'": 0, "'f'": 0, "'g'": 0, "'h'": 0, "'i'": 0, "'j'": 0,
                         "'k'": 0, "'l'": 0, "'m'": 0, "'n'": 0, "'o'": 0, "'p'": 0, "'q'": 0, "'r'": 0, "'s'": 0, "'t'": 0,
                         "'u'": 0, "'v'": 0, "'w'": 0, "'x'": 0, "'y'": 0, "'z'": 0, '?': 0}

#Putting data in the tables by looping through and doing complicated dictionary addition
for column in labels:
    for rowIndex in range(len(mushroomData[column]) - 2):
        if mushroomData['class'][rowIndex] == "\'p\'":
            poisonDict[column][mushroomData[column][rowIndex]] += 1
        elif mushroomData['class'][rowIndex] == "\'e\'":
            edibleDict[column][mushroomData[column][rowIndex]] += 1
        else:
            pass

#Deleting entries with no frequency
attributes = list(poisonDict.keys())
values = list(poisonDict['cap-shape'].keys())
        
for attribute in attributes:
    for value in values:
        if poisonDict[attribute][value] == 0:
            del poisonDict[attribute][value]
        if edibleDict[attribute][value] == 0:
            del edibleDict[attribute][value]

In [52]:
#Weighting of attributes
charWeights = Table().with_columns(
'Characteristic', labels[:-1],
'Edible Weight', [1.0 for x in range(len(labels[:-1]))],
'Poisonous Weight', [1.0 for x in range(len(labels[:-1]))]
)

#Characteristics that fail to have a trait with at least 33% representation are worth less, as they make less of a strong case
#for being a defining characteristic of a mushroom
for index in range(22):
    if max(featureBreakdown('p', charWeights['Characteristic'][index])['Percent mushrooms with tag']) < 33:
        charWeights['Poisonous Weight'][index] = 0.5
    if max(featureBreakdown('e', charWeights['Characteristic'][index])['Percent mushrooms with tag']) < 33:
        charWeights['Edible Weight'][index] = 0.5

In [53]:
def featureBreakdown(edibility, characteristic, task='percent', frequency='all'):
    """
    Takes the edibility of a mushroom and the desired trait and returns a table with each subcharacteristic and its frequency
    within its edibility
    
    task: takes percent or freq, displaying either the percent as a proportion of all mushrooms of that edibility type, or a 
    frequency count
    
    frequency: all, high, or low. decides the threshhold a frequency has to pass for it to be included in the table
    
    Ex:
    >>> featureBreakdown('p', 'cap-shape', task='percent', frequency='low')
        cap-shape | Percent mushrooms with tag
        'b'       | 1.2
        'c'       | 0.1
        'k'       | 15.3
    """
    
    #Determines which dictionary and table to use based on edibility type
    if edibility == 'e':
        d = edibleDict
        mushroomsTable = edibleShrooms
    elif edibility == 'p':
        d = poisonDict
        mushroomsTable = poisonShrooms
    else:
        raise TypeError('Please enter a edibility value of \'p\' or \'e\'')
        
    tags = [x for x in d[characteristic].keys()]
    count = [y for y in d[characteristic].values()]
    
    #Creates the requisite column based on the argument passed in
    if task == 'freq':
        
        t = Table().with_columns(
        characteristic, tags,
        'Count', count
        )
        
        return t
    
    elif task == 'percent':
        
        totalMushrooms = mushroomsTable.num_rows
        count = [round(x/totalMushrooms, 3)*100 for x in count]
        
        t = Table().with_columns(
        characteristic, tags,
        'Percent mushrooms with tag', count
        )
        
        if frequency == 'all':
            return t
        elif frequency == 'high':
            return t.where([x for x in t['Percent mushrooms with tag'] >= 75])
        elif frequency == 'low':
            return t.where([x for x in t['Percent mushrooms with tag'] <= 25])
        
    else:
        return

In [66]:
def determineEdibility(charDict, high=75, low=25, normalScore=22):
    poisonScore = normalScore
    majority = False
    
    if high == 'majority':
        majority = True
    
    for char in list(charDict.keys())[:-1]:
        poisonousChars = featureBreakdown('p', char)
        edibleChars = featureBreakdown('e', char)
        commonP = True
        commonE = True
        
        if majority:
            high = max(poisonousChars['Percent mushrooms with tag'])
        
        if poisonousChars.where([x for x in np.logical_and(poisonousChars['Percent mushrooms with tag'] >= high,
                                charDict[char] == poisonousChars[char])]).num_rows != 0:
            commonP = True
            print(char + ' is commonly found in poisonous mushrooms...')
        elif poisonousChars.where([x for x in np.logical_and(poisonousChars['Percent mushrooms with tag'] <= low,
                                charDict[char] == poisonousChars[char])]).num_rows != 0:
            commonP = False
            print(char + ' is uncommonly found in poisonous mushrooms...')
            
        if majority:
            high = max(edibleChars['Percent mushrooms with tag'])
            
        if edibleChars.where([x for x in np.logical_and(edibleChars['Percent mushrooms with tag'] >= high,
                            charDict[char] == edibleChars[char])]).num_rows != 0:
            commonE = True
            print(char + ' is commonly found in edible mushrooms...')
        elif edibleChars.where([x for x in np.logical_and(edibleChars['Percent mushrooms with tag'] <= low,
                                charDict[char] == edibleChars[char])]).num_rows != 0:
            commonE = False
            print(char + ' is uncommonly found in edible mushrooms...')
        
        if commonP and commonE:
            poisonScore += 0
        elif commonP and not commonE:
            poisonScore += 1 * charWeights['Poisonous Weight'][characteristicsDictionary[char]]
        elif commonE and not commonP:
            poisonScore -= 1 * charWeights['Edible Weight'][characteristicsDictionary[char]]
        elif not commonE and not commonP:
            poisonScore -= 0
    
    verdict = ''
    if 0 <= poisonScore < normalScore/2:
        verdict = 'definitely not poisonous'
    elif normalScore/2 <= poisonScore < normalScore:
        verdict = 'likely not poisonous'
    elif normalScore < poisonScore < 3*normalScore/2:
        verdict = 'likely poisonous'
    elif 3*normalScore/2 <= poisonScore <= 2*normalScore:
        verdict = 'definitely poisonous'
    elif poisonScore == normalScore:
        verdict = 'not able to be categorized based on the data given'
        
    printer = "With a score of " + str(poisonScore) + ', this mushroom is ' + verdict
    
    return printer

In [57]:
def displayParagon(edibility):
    """
    Takes an edibility type and returns the mushroom with the characteristics that are most commonly present in that type 
    of mushroom according to the data
    """
    if edibility == 'e':
        charDict = edibleDict
        mushroomsTable = edibleShrooms
    elif edibility == 'p':
        charDict = poisonDict
        mushroomsTable = poisonShrooms
    else:
        raise TypeError('Please enter a edibility value of \'p\' or \'e\'')
    
    keys = list(charDict.keys())[:-1]
    values = []
    
    for char in keys:
        dist = featureBreakdown(edibility, char)
        highestValue = dist.where('Percent mushrooms with tag', max(dist['Percent mushrooms with tag']))[char][0]
        values.append(highestValue)
        
    return {k:v for k,v in zip(keys, values)}

In [71]:
#Some test mushrooms
mushroomTemplate = {k : v for k,v in zip(poisonShrooms.labels, poisonShrooms.rows[0])}
defPoisonous = {'cap-surface': "'y'", 'cap-shape': "'x'", 'stalk-root': "'b'", 'spore-print-color': "'w'",
 'ring-number': "'o'", 'ring-type': "'l'", 'gill-size': "'n'", 'gill-color': "'w'", }

In [72]:
determineEdibility(mushroomTemplate, high='majority')

cap-shape is commonly found in poisonous mushrooms...
cap-shape is commonly found in edible mushrooms...
cap-color is commonly found in poisonous mushrooms...
cap-color is commonly found in edible mushrooms...
bruises%3F is uncommonly found in poisonous mushrooms...
bruises%3F is commonly found in edible mushrooms...
odor is uncommonly found in poisonous mushrooms...
gill-attachment is commonly found in poisonous mushrooms...
gill-attachment is commonly found in edible mushrooms...
gill-spacing is commonly found in poisonous mushrooms...
gill-spacing is commonly found in edible mushrooms...
gill-size is commonly found in poisonous mushrooms...
gill-size is uncommonly found in edible mushrooms...
gill-color is uncommonly found in poisonous mushrooms...
gill-color is uncommonly found in edible mushrooms...
stalk-root is uncommonly found in poisonous mushrooms...
stalk-root is uncommonly found in edible mushrooms...
stalk-surface-above-ring is commonly found in edible mushrooms...
stalk-s

'With a score of 19.0, this mushroom is likely not poisonous'

In [70]:
mushroomData.show(4)

cap-shape,cap-surface,cap-color,bruises%3F,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat,class
'x','s','n','t','p','f','c','n','k','e','e','s','s','w','w','p','w','o','p','k','s','u','p'
'x','s','y','t','a','f','c','b','k','e','c','s','s','w','w','p','w','o','p','n','n','g','e'
'b','s','w','t','l','f','c','b','n','e','c','s','s','w','w','p','w','o','p','n','n','m','e'
'x','y','w','t','p','f','c','n','n','e','e','s','s','w','w','p','w','o','p','k','s','u','p'
