In [None]:
# Welcome to the iPython notebook tutorial!

In [None]:
# You can write some Python code here and run it with Shift-Enter.
# The result of your last line of code gets printed out,
# so no more need for `print` statements!

x = 5
y = 3
x + y

In [None]:
# iPython remembers the variables you've used before.
x * y

In [None]:
# You can make functions too!

def square(x):
    return x * x

square(12)

In [None]:
# You can go back and edit a previous line, too.
# Can you turn the previous function `square` into `cube`?

In [None]:
# Let's play around with a real dataset.

In [None]:
# It's a best practice to import all your required packages up here.

import csv

In [None]:
# This is a handy recipe for turning a CSV into a 
# list of dictionaries - which are like structs in C
# or objects in JavaScript, if you're familiar with those.

concentration_data = []

with open('concentration-enrollment.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        print(row['concentration'])
        # You can't just do `concentration_data = reader`
        # because the `reader` object disappears once the file
        # is read. Instead, you want to copy each row into
        # a different list.
        concentration_data.append(row)

In [None]:
# Let's see what we got
concentration_data

In [None]:
# We can now start crunching the numbers!

# Let's just see what the first piece of data looks like
# Each year corresponds with the number of concentrators in that concentration
# in that year.
aaas = concentration_data[0]

In [None]:
# Ew, it looks like the enrollment numbers are strings, not integers!
# That makes it harder to run calculations.
# Let's clean up this data.

def clean_concentration(raw):
    return dict([
        ('concentration', raw['concentration']),
        ('2010-11', int(raw['2010-11'])),
        ('2011-12', int(raw['2011-12'])),
        ('2012-13', int(raw['2012-13'])),
        ('2013-14', int(raw['2013-14'])),
        ('2014-15', int(raw['2014-15']))
    ])

clean_concentration(aaas)

In [None]:
# That's better! Let's clean all the concentrations, then
clean_concentration_data = [clean_concentration(c) for c in concentration_data]

clean_concentration_data[0]

In [None]:
# Let's see how many concentrators Computer Science added from 2010-2015.
# This is a trick to filter a list
computer_science = [c for c in clean_concentration_data if c['concentration'] == 'Computer Science'][0]
computer_science

In [None]:
#####
# Now it's your turn to start working with the data!
#####

In [None]:
# Calculate the growth in CS concentrators from 2010-11 to 2014-15. 
new_cs_concentrators = computer_science['2014-15'] - computer_science['2010-11']
growth_percent = float(new_cs_concentrators) / computer_science['2010-11'] * 100

"CS added {} concentrators, for a growth rate of {} percent!".format(new_cs_concentrators, growth_percent)

In [None]:
# Hey, that was cool. Why don't we do that for every concentration?

def get_growth(concentration):
    old = concentration['2010-11']
    new = concentration['2014-15']
    if old != 0:
        diff = float(new - old)
        return diff / old * 100

def summary_for_concentration(concentration):
    rate = get_growth(concentration)
    name = concentration["concentration"]
    if rate:
        return "{} added {} concentrators, for a growth rate of {} percent!".format(name, new_cs_concentrators, growth_percent)
    else:
        return "{} started with 0 concentrators; the growth rate is undefined.".format(name)

for c in clean_concentration_data:
    print summary_for_concentration(c)

In [None]:
# Things to do next:
# - Print the 5 largest concentrations.

def sort_size(data):
    final = []
    for concentration in data:
        current_size = concentration["2014-15"]
        name = concentration["concentration"]
        final.append((current_size, name))
    final.sort(reverse=True)
    return final

largest_size_concentrations = sort_size(clean_concentration_data)[:5]

largest_size_concentrations

In [None]:
# - Print the 5 fastest-growing concentrations.

def sort_growth(data):
    final = []
    for concentration in data:
        name = concentration["concentration"]
        rate = get_growth(concentration)
        if rate:
            final.append((rate, name))
    final.sort(reverse=True)
    return final

largest_growing_concentrations = sort_growth(clean_concentration_data)[:5]

largest_growing_concentrations

In [None]:
# - Add the growth rate to each concentration's dict.

for concentration in clean_concentration_data:
    concentration["rate"] = get_growth(concentration)
    
clean_concentration_data

In [55]:
# - Write a new CSV that includes the growth rate for each concentration.

with open('concentration-growth.csv', 'w') as csvfile:
    fieldnames = ['concentration', '2011-12', '2012-13','2013-14','2014-15', 'rate']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(clean_concentration_data)

ValueError: dict contains fields not in fieldnames: '2010-11'

In [None]:
# If you're feeling ambitious:
# - Use matplotlib to make a graph.

import matplotlib.pyplot as plt

def graph_concentration(concentration):
    lst = []
    for key in concentration:
        if key != "concentration":
            lst.append((key, concentration[key]))
    lst.sort()
    labels = [x[0] for x in lst]
    numbers = [x[1] for x in lst]
    plt.plot(range(5), numbers, 'r')
    plt.title(concentration["concentration"])
    plt.axis([0, 4, 0, max(numbers) + 10])
    plt.xticks(range(5), labels)
    plt.show()

for c in clean_concentration_data:
    graph_concentration(c)
