## Exploring and preparing the data

In [1]:
# importing the mushroom.csv data
mushroom<-read.csv("mushrooms.csv", stringsAsFactors = TRUE)
str(mushroom)

'data.frame':	8124 obs. of  23 variables:
 $ type                    : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
 $ cap.shape               : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
 $ cap.surface             : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
 $ cap.color               : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
 $ bruises                 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
 $ odor                    : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
 $ gill.attachment         : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
 $ gill.spacing            : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
 $ gill.size               : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
 $ gill.color              : Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
 $ stalk.shape             : Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
 $ stalk.r

In [2]:
# dropping the veil.type variable
mushroom$veil.type<-NULL

In [3]:
# feature of type variable in numerics
table(mushroom$type)

# in percentage
prop.table(table(mushroom$type))*100


   e    p 
4208 3916 


       e        p 
51.79714 48.20286 

## Training the model

In [6]:
# install.packages("RWeka")
library(RWeka)
mushroom_1R<-OneR(type ~ ., data = mushroom)

In [8]:
# To check on the rule we created 
mushroom_1R

odor:
	a	-> e
	c	-> p
	f	-> p
	l	-> e
	m	-> p
	n	-> e
	p	-> p
	s	-> p
	y	-> p
(8004/8124 instances correct)


## Evaluating the model performance

In [9]:
summary(mushroom_1R)


=== Summary ===

Correctly Classified Instances        8004               98.5229 %
Incorrectly Classified Instances       120                1.4771 %
Kappa statistic                          0.9704
Mean absolute error                      0.0148
Root mean squared error                  0.1215
Relative absolute error                  2.958  %
Root relative squared error             24.323  %
Total Number of Instances             8124     

=== Confusion Matrix ===

    a    b   <-- classified as
 4208    0 |    a = e
  120 3796 |    b = p

## Improving the model performance

In [10]:
# Using the JRip()
mushroom_JRip <- JRip(type ~ ., data = mushroom)
mushroom_JRip

JRIP rules:

(odor = f) => type=p (2160.0/0.0)
(gill.size = n) and (gill.color = b) => type=p (1152.0/0.0)
(gill.size = n) and (odor = p) => type=p (256.0/0.0)
(odor = c) => type=p (192.0/0.0)
(spore.print.color = r) => type=p (72.0/0.0)
(stalk.surface.below.ring = y) and (stalk.surface.above.ring = k) => type=p (68.0/0.0)
(habitat = l) and (cap.color = w) => type=p (8.0/0.0)
(stalk.color.above.ring = y) => type=p (8.0/0.0)
 => type=e (4208.0/0.0)

Number of Rules : 9
