# Combining Data on Grades

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

### Given: Three Numpy Arrays of Grades

In [2]:
np.random.seed(1)
ar1 = np.random.choice(['A','B','C','D','F'], 100, p=[.2,.4,.3,.08,.02])
ar2 = np.random.choice(['A','B','C','D','F'], 50, p=[.3,.4,.2,.1,0])
ar3 = np.random.choice(['a','b','c','d','f'], 200, p=[.15,.45,.25,.13,.02])

### Create pandas Series from these Arrays
Use the default index for each. Save the series as `s1`, `s2` and `s3`

In [3]:
s1 = pd.Series(ar1)
s2 = pd.Series(ar2)
s3 = pd.Series(ar3)

### Get the Value Counts of each of the Series
Save the resulting series as `grades1`, `grades2`, and `grades3`

In [4]:
grades1 = s1.value_counts()
grades2 = s2.value_counts()
grades3 = s3.value_counts()
grades3

b    90
c    52
d    33
a    23
f     2
dtype: int64

### Compare the indexes of the three `grades` variables 
You should see that the index for `grades3` uses lowercase letters, while the other two use uppercase letters.

In [5]:
grades1.index, grades2.index, grades3.index

(Index(['B', 'C', 'A', 'D', 'F'], dtype='object'),
 Index(['A', 'B', 'C', 'D'], dtype='object'),
 Index(['b', 'c', 'd', 'a', 'f'], dtype='object'))

### Reindex `grades3` to use uppercase letters
This is a little tricky because the index for `grades3` is not in alphabetical order. You will need to sort it on its index first.

In [8]:
grades3 = grades3.sort_index()
grades3.index = ['A','B','C','D','F']
grades3.index

(Index(['A', 'B', 'C', 'D', 'F'], dtype='object'), A    23
 B    90
 C    52
 D    33
 F     2
 dtype: int64)

### Add the three `grades` Series together
Don't forget to set the fill value to 0.

In [9]:
grades_all = grades1.add(grades2, fill_value=0).add(grades3, fill_value=0)
grades_all

A     67.0
B    140.0
C     92.0
D     48.0
F      3.0
dtype: float64

### From `grades_all`, create a `grades_breakout` Series that holdes the share of each grade.
`grades_breakout.sum()` should equal 1

In [12]:
grades_breakout = grades_all / grades_all.sum()
grades_breakout, grades_breakout.sum()


(A    0.191429
 B    0.400000
 C    0.262857
 D    0.137143
 F    0.008571
 dtype: float64, 1.0)

## A different approach: First, change case of values of `s3`.

In [16]:
s3 = s3.str.upper()
s3

0      A
1      D
2      B
3      B
4      B
      ..
195    C
196    C
197    C
198    C
199    A
Length: 200, dtype: object

### Then combine the Series of grades.

In [17]:
s_all = pd.concat([s1,s2,s3])
s_all

0      B
1      C
2      A
3      B
4      A
      ..
195    C
196    C
197    C
198    C
199    A
Length: 350, dtype: object

### Then get the value counts of the combined series and get `grades_breakout`.

In [18]:
grades_all = s_all.value_counts()
grades_breakout = grades_all / grades_all.sum()
grades_breakout.sort_index(), grades_breakout.sum()

(A    0.191429
 B    0.400000
 C    0.262857
 D    0.137143
 F    0.008571
 dtype: float64, 0.9999999999999999)