In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# For reference, see chapter 9 of Python for Data Analysis, 2nd Edition

# Line graphs

In [None]:
url = 'https://github.com/nytimes/covid-19-data/raw/master/us.csv'
covid = pd.read_csv(url)
covid.tail()

In [None]:
covid.plot()
# covid.plot(kind='line') # .plot() defaults to this specific kind of plot
# covid.plot.line() # alternate method of specifying the kind of plot

Fix the dates on the X axis

In [None]:
covid['date'] = pd.to_datetime(covid['date'], format = '%Y-%m-%d') # converts string to datetime object
covid.set_index(['date'], inplace=True)
covid.head()

In [None]:
covid.plot()

# pyplot from matplotlib

In [None]:
first_cases = covid[:50]
first_cases.head()

## Controlling display with figures and subplots

In Jupyter notebooks, plots are reset after every cell, so setup code must be included in a single cell

In [None]:
# Create a figure object
fig = plt.figure(figsize=(10,10))

# Create 2 subplots with 2 rows and 1 column
axes1 = fig.add_subplot(2, 1, 1)
axes2 = fig.add_subplot(2, 1, 2)
axes1.plot(first_cases.index, first_cases.cases, color='k', linestyle='dashed', marker='o')
axes1.set_title('cases')
axes2.plot(first_cases.index, first_cases.deaths, color='r', linestyle='dashed', marker='x')
axes2.set_title('deaths')

## Plot in a single subplot

Display as a bar graph (unstacked)

In [None]:
# Create a figure object
fig = plt.figure(figsize=(10,10))

# Create a single subplot
ax = fig.add_subplot(1, 1, 1)
ax.bar(first_cases.index, first_cases.cases, color='k')
ax.bar(first_cases.index, first_cases.deaths, color='r')
ax.set_title('start of the COVID 19 pandemic in the U.S.')

## Creating a plot programatically
Stacked bar graph 

In [None]:
# Reload state_co2_sector if necessary
url = 'https://github.com/HeardLibrary/digital-scholarship/raw/master/data/codegraf/co2_state_2016_sector.xlsx'
state_co2_sector = pd.read_excel(url)
# Extract sector data for the top few states
number_of_states = 4
top_state_sectors = state_co2_sector.set_index('State').drop('Total').sort_values(by='Total', ascending=False).drop(['Total'], axis='columns')[:number_of_states]
top_state_sectors.head()

In [None]:
# Based on example at https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781849513265/1/ch01lvl1sec17/plotting-stacked-bar-charts
# See also https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/bar_stacked.html

# Create a figure object
fig = plt.figure(figsize=(15,10))

# Create a single subplot
ax = fig.add_subplot(1, 1, 1)

# Create a numpy array with one element for each row
ind = np.arange(len(top_state_sectors))
#print(ind)

# Extract the row and column labels as numpy arrays from pandas series
row_labels = top_state_sectors.index.values
column_labels = top_state_sectors.columns.values

for sector_number in range(len(top_state_sectors.columns)):
    #print(sector_number)
    #print(top_state_sectors.iloc[:, :sector_number])
    sector_sums = top_state_sectors.iloc[:, :sector_number].sum(axis='columns')
    #print(sector_sums)
    ax.bar(ind, top_state_sectors.iloc[:, sector_number], bottom=sector_sums)

# These functions operate on the most recently active subplot; we have only one in this example
plt.xticks(ind, row_labels)
plt.legend(column_labels)
