In [1]:
import pandas as pd

## Attribute Information:

### Classes
19 Classes:
- diaporthe-stem-canker, 
- charcoal-rot, 
- rhizoctonia-root-rot,
- phytophthora-rot, 
- brown-stem-rot, 
- powdery-mildew,
- downy-mildew, 
- brown-spot, 
- bacterial-blight,
- bacterial-pustule, 
- purple-seed-stain, 
- anthracnose,
- phyllosticta-leaf-spot, 
- alternarialeaf-spot,
- frog-eye-leaf-spot, 
- diaporthe-pod-&-stem-blight,
- cyst-nematode, 
- 2-4-d-injury, 
- herbicide-injury.

### Features
1. date: april,may,june,july,august,september,october,?.
2. plant-stand: normal,lt-normal,?.
3. precip: lt-norm,norm,gt-norm,?.
4. temp: lt-norm,norm,gt-norm,?.
5. hail: yes,no,?.
6. crop-hist: diff-lst-year,same-lst-yr,same-lst-two-yrs,same-lst-sev-yrs,?.
7. area-damaged: scattered,low-areas,upper-areas,whole-field,?.
8. severity: minor,pot-severe,severe,?.
9. seed-tmt: none,fungicide,other,?.
10. germination: 90-100%,80-89%,lt-80%,?.
11. plant-growth: norm,abnorm,?.
12. leaves: norm,abnorm.
13. leafspots-halo: absent,yellow-halos,no-yellow-halos,?.
14. leafspots-marg: w-s-marg,no-w-s-marg,dna,?.
15. leafspot-size: lt-1/8,gt-1/8,dna,?.
16. leaf-shread: absent,present,?.
17. leaf-malf: absent,present,?.
18. leaf-mild: absent,upper-surf,lower-surf,?.
19. stem: norm,abnorm,?.
20. lodging: yes,no,?.
21. stem-cankers: absent,below-soil,above-soil,above-sec-nde,?.
22. canker-lesion: dna,brown,dk-brown-blk,tan,?.
23. fruiting-bodies: absent,present,?.
24. external decay: absent,firm-and-dry,watery,?.
25. mycelium: absent,present,?.
26. int-discolor: none,brown,black,?.
27. sclerotia: absent,present,?.
28. fruit-pods: norm,diseased,few-present,dna,?.
29. fruit spots: absent,colored,brown-w/blk-specks,distort,dna,?.
30. seed: norm,abnorm,?.
31. mold-growth: absent,present,?.
32. seed-discolor: absent,present,?.
33. seed-size: norm,lt-norm,?.
34. shriveling: absent,present,?.
35. roots: norm,rotted,galls-cysts,?.

In [2]:
columns = ['class', 'date', 'plant-stand', 'precip', 'temp', 'hail', 'crop-hist', \
           'area-damaged', 'severity', 'seed-tmt', 'germination', 'plant-growth', \
          'leaves', 'leafspots-halo', 'leafspots-marg', 'leafspot-size', 'leaf-shread', \
           'leaf-malf', 'leaf-mild', 'stem', 'lodging', 'stem-cankers', 'canker-lesion',\
          'fruiting-bodies', 'external decay', 'mycelium', 'int-discolor', 'sclerotia',\
          'fruit-pods', 'fruit spots', 'seed', 'mold-growth', 'seed-discolor', 'seed-size',\
          'shriveling', 'roots']
df = pd.read_csv('soybean-large.data', names=columns)

In [3]:
df.head(20)

Unnamed: 0,class,date,plant-stand,precip,temp,hail,crop-hist,area-damaged,severity,seed-tmt,...,int-discolor,sclerotia,fruit-pods,fruit spots,seed,mold-growth,seed-discolor,seed-size,shriveling,roots
0,diaporthe-stem-canker,6,0,2,1,0,1,1,1,0,...,0,0,0,4,0,0,0,0,0,0
1,diaporthe-stem-canker,4,0,2,1,0,2,0,2,1,...,0,0,0,4,0,0,0,0,0,0
2,diaporthe-stem-canker,3,0,2,1,0,1,0,2,1,...,0,0,0,4,0,0,0,0,0,0
3,diaporthe-stem-canker,3,0,2,1,0,1,0,2,0,...,0,0,0,4,0,0,0,0,0,0
4,diaporthe-stem-canker,6,0,2,1,0,2,0,1,0,...,0,0,0,4,0,0,0,0,0,0
5,diaporthe-stem-canker,5,0,2,1,0,3,0,1,0,...,0,0,0,4,0,0,0,0,0,0
6,diaporthe-stem-canker,5,0,2,1,0,2,0,1,1,...,0,0,0,4,0,0,0,0,0,0
7,diaporthe-stem-canker,4,0,2,1,1,1,0,1,0,...,0,0,0,4,0,0,0,0,0,0
8,diaporthe-stem-canker,6,0,2,1,0,3,0,1,1,...,0,0,0,4,0,0,0,0,0,0
9,diaporthe-stem-canker,4,0,2,1,0,2,0,2,0,...,0,0,0,4,0,0,0,0,0,0


Notice how the values are all integers or "?".  The integers relate to the values gives in the attribute descriptions.
So, for example, date value of 0 corresponds to April, roots value of 1 corresponds to "rotted".

If you find it useful, you can create a mapping from the integer to the string.

In [4]:
def create_dict(vals):
    tmp_dict = {k: v for k, v in enumerate(vals.strip().split(',')) if v != '?'}
    tmp_dict['?'] = None
    return tmp_dict

maps = {
    'date': create_dict('april,may,june,july,august,september,october,?'),
    'plant-stand': create_dict('normal,lt-normal,?'),
    'precip': create_dict('lt-norm,norm,gt-norm,?'),
    'temp': create_dict('lt-norm,norm,gt-norm,?'),
    'hail': create_dict('yes,no,?'),
    'crop-hist': create_dict('diff-lst-year,same-lst-yr,same-lst-two-yrs,same-lst-sev-yrs,?'),
    'area-damaged': create_dict('scattered,low-areas,upper-areas,whole-field,?'),
    'severity': create_dict('minor,pot-severe,severe,?'),
    'seed-tmt': create_dict('none,fungicide,other,?'),
    'germination': create_dict('90-100%,80-89%,lt-80%,?'),
    'plant-growth': create_dict('norm,abnorm,?'),
    'leaves': create_dict('norm,abnorm'),
    'leafspots-halo': create_dict('absent,yellow-halos,no-yellow-halos,?'),
    'leafspots-marg': create_dict('w-s-marg,no-w-s-marg,dna,?'),
    'leafspot-size': create_dict('lt-1/8,gt-1/8,dna,?'),
    'leaf-shread': create_dict('absent,present,?'),
    'leaf-malf': create_dict('absent,present,?'),
    'leaf-mild': create_dict('absent,upper-surf,lower-surf,?'),
    'stem': create_dict('norm,abnorm,?'),
    'lodging': create_dict('yes,no,?'),
    'stem-cankers': create_dict('absent,below-soil,above-soil,above-sec-nde,?'),
    'canker-lesion': create_dict('dna,brown,dk-brown-blk,tan,?'),
    'fruiting-bodies': create_dict('absent,present,?'),
    'external decay': create_dict('absent,firm-and-dry,watery,?'),
    'mycelium': create_dict('absent,present,?'),
    'int-discolor': create_dict('none,brown,black,?'),
    'sclerotia': create_dict('absent,present,?'),
    'fruit-pods': create_dict('norm,diseased,few-present,dna,?'),
    'fruit spots': create_dict('absent,colored,brown-w/blk-specks,distort,dna,?'),
    'seed': create_dict('norm,abnorm,?'),
    'mold-growth': create_dict('absent,present,?'),
    'seed-discolor': create_dict('absent,present,?'),
    'seed-size': create_dict('norm,lt-norm,?'),
    'shriveling': create_dict('absent,present,?'),
    'roots': create_dict('norm,rotted,galls-cysts,?')
}
        

In [5]:
def get_map_val(key, val):
    if val == '?':
        search_val = '?'
    else:
        search_val = int(val)
        
    return maps.get(key).get(search_val)

for c in maps.keys():
    df[c] = df[c].apply(lambda x: get_map_val(c, x))

6 6
4 4
3 3
3 3
6 6
5 5
5 5
4 4
6 6
4 4
6 6
4 4
3 3
6 6
6 6
5 5
6 6
4 4
3 3
5 5
1 1
1 1
3 3
0 0
0 0
1 1
1 1
2 2
1 1
2 2
0 0
1 1
2 2
1 1
2 2
3 3
0 0
3 3
2 2
2 2
2 2
1 1
0 0
0 0
3 3
2 2
0 0
2 2
2 2
0 0
1 1
1 1
3 3
2 2
3 3
1 1
3 3
2 2
3 3
1 1
2 2
3 3
2 2
3 3
2 2
1 1
3 3
3 3
3 3
3 3
4 4
4 4
3 3
5 5
5 5
4 4
5 5
6 6
5 5
5 5
4 4
4 4
4 4
3 3
5 5
4 4
4 4
4 4
4 4
3 3
5 5
6 6
1 1
6 6
4 4
6 6
2 2
6 6
5 5
1 1
6 6
2 2
1 1
4 4
1 1
2 2
2 2
4 4
4 4
5 5
1 1
2 2
2 2
2 2
1 1
1 1
0 0
2 2
1 1
2 2
5 5
1 1
1 1
4 4
1 1
4 4
2 2
0 0
1 1
1 1
1 1
2 2
3 3
2 2
1 1
1 1
5 5
4 4
3 3
1 1
4 4
2 2
2 2
3 3
3 3
2 2
3 3
3 3
2 2
5 5
5 5
4 4
2 2
3 3
3 3
3 3
3 3
4 4
2 2
4 4
2 2
3 3
2 2
4 4
3 3
3 3
3 3
3 3
4 4
5 5
6 6
6 6
4 4
4 4
4 4
6 6
3 3
3 3
5 5
4 4
5 5
5 5
6 6
2 2
3 3
4 4
6 6
1 1
6 6
5 5
5 5
0 0
6 6
5 5
5 5
6 6
5 5
6 6
5 5
5 5
3 3
3 3
3 3
3 3
3 3
2 2
1 1
2 2
2 2
2 2
4 4
4 4
3 3
6 6
6 6
5 5
6 6
5 5
6 6
6 6
5 5
4 4
6 6
5 5
5 5
4 4
4 4
5 5
6 6
6 6
5 5
5 5
4 4
6 6
4 4
6 6
4 4
4 4
5 5
3 3
5 5
5 5
4 4
5 5
6 6
5 5
6 6
5 5
5 5
6 6


TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [None]:
#df