# Customizing plots

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn import datasets
import warnings
warnings.filterwarnings('ignore')

In [2]:
sns.set(rc={'figure.figsize':(11,8)})

In [3]:
weather = pd.read_csv('pittsburgh2013.csv', parse_dates=['Date'])
weather.columns()

TypeError: 'Index' object is not callable

In [None]:
max_temp = np.array(weather['Max TemperatureF'])
min_temp = np.array(weather['Min TemperatureF'])
t = weather['Date']

In [None]:
plt.plot(t, max_temp, 'r')
plt.plot(t, min_temp, 'b')
plt.xlabel('Date')
plt.ylabel('Max & Min Temp')
plt.show()

In [None]:
_ = plt.subplot(2,1,1)
_ = plt.plot(t, max_temp, 'r')
_ = plt.xlabel('Date')
_ = plt.ylabel('Max Temp')
_ = plt.subplot(2,1,2)
_ = plt.plot(t, min_temp, 'b')
_ = plt.xlabel('Date')
_ = plt.ylabel('Min Temp')
plt.show()

### The subplot() command 
- Syntax: `subplot(nrow, ncols, nsubplot)` 
- Subplot ordering:
 - Row-wise from top left
 - Indexed from 1 

In [None]:
degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
degrees.head(3)

In [None]:
year = np.array(degrees.Year)
physical_sciences = np.array(degrees['Physical Sciences'])
computer_science = np.array(degrees['Computer Science'])
health = np.array(degrees['Health Professions'])
education = np.array(degrees['Education'])

In [None]:
plt.plot(year, physical_sciences, color='blue')
plt.plot(year, computer_science, color='red')
plt.show()

In [None]:
plt.axes([0.05,0.05,0.425,0.9])
plt.plot(year, physical_sciences, color='blue')
plt.axes([0.525,0.05,0.425,0.9])
plt.plot(year, computer_science, color='red')
plt.show()

In [None]:
plt.subplot(1,2,1)
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')
plt.subplot(1,2,2)
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')
plt.tight_layout()
plt.show()

In [None]:
# Create a figure with 2x2 subplot layout and make the top left subplot active
plt.subplot(2,2,1)

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Make the top right subplot active in the current 2x2 subplot grid 
plt.subplot(2,2,2)

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Make the bottom left subplot active in the current 2x2 subplot grid
plt.subplot(2,2,3)

# Plot in green the % of degrees awarded to women in Health Professions
plt.plot(year, health, color='green')
plt.title('Health Professions')

# Make the bottom right subplot active in the current 2x2 subplot grid
plt.subplot(2,2,4)

# Plot in yellow the % of degrees awarded to women in Education
plt.plot(year, education, color='yellow')
plt.title('Education')

# Improve the spacing between subplots and display them
plt.tight_layout()
plt.show()

### Customizing axes

#### Controlling axis extents
- `axis([xmin, xmax, ymin, ymax])` sets axis extents
- Control over individual axis extents
 - `xlim([xmin,xmax])`
 - `ylim([ymin, ymax])`
- Can use tuples, lists for extents
 - eg: `xlim((-2,3))` or `xlim([-2,3])`

In [None]:
GDP = pd.read_csv('gdp_usa.csv', parse_dates=['DATE'], index_col='DATE')
GDP = GDP.resample('A').last()
GDP.index = GDP.index.year
GDP.tail()

In [None]:
# Plot the % of degrees awarded to women in Computer Science and the Physical Sciences
plt.plot(year,computer_science, color='red') 
plt.plot(year, physical_sciences, color='blue')

# Add the axis labels
plt.xlabel('Year')
plt.ylabel('Degrees awarded to women (%)')

# Set the x-axis range
plt.xlim(1990, 2010)

# Set the y-axis range
plt.ylim(0, 50)

# Add a title and display the plot
plt.title('Degrees awarded to women (1990-2010)\nComputer Science (red)\nPhysical Sciences (blue)')

In [None]:
# Plot in blue the % of degrees awarded to women in Computer Science
plt.plot(year,computer_science, color='blue')

# Plot in red the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences,color='red')

# Set the x-axis and y-axis limits
plt.axis([1990, 2010, 0, 50])

# Save the figure as 'axis_limits.png'
plt.savefig('axis_limits.png')

# Show the figure
plt.show()

### Legends, 

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
iris.keys()

In [None]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
print(iris.target_names)
df.tail()

In [None]:
setosa_len = df.loc[df['target'] == 0, 'sepal length (cm)']
setosa_wid = df.loc[df['target'] == 0, 'sepal width (cm)']

In [None]:
versicolor_len = df.loc[df['target'] == 1, 'sepal length (cm)']
versicolor_wid = df.loc[df['target'] == 1, 'sepal width (cm)']

In [None]:
virginica_len = df.loc[df['target'] == 2, 'sepal length (cm)']
virginica_wid = df.loc[df['target'] == 2, 'sepal width (cm)']

In [None]:
plt.scatter(setosa_len, setosa_wid, 
           marker='o', color='red', label='setosa');
plt.scatter(versicolor_len, versicolor_wid,
           marker='o', color='green', label='versicolor');
plt.scatter(virginica_len, virginica_wid,
           marker='o', color='blue', label='virginica');
plt.legend(loc='upper right');
plt.title('Iris data');
plt.xlabel('sepal length (cm)');
plt.ylabel('sepal width (cm)');

### Plot annotations
- Text lables and arrows using annotate() method
- Flexible specification of coordinates
- Keyword arrowprops: dict of arrow properties
 - width
 - color


In [None]:
plt.scatter(setosa_len, setosa_wid, 
           marker='o', color='red', label='setosa');
plt.scatter(versicolor_len, versicolor_wid,
           marker='o', color='green', label='versicolor');
plt.scatter(virginica_len, virginica_wid,
           marker='o', color='blue', label='virginica');
#plt.legend(loc='upper right');
plt.annotate('setosa', xy=(5.0, 3.5));
plt.annotate('virginica', xy=(7.25, 3.5));
plt.annotate('versicolor', xy=(5.0, 2.0));
plt.title('Iris data');
plt.xlabel('sepal length (cm)');
plt.ylabel('sepal width (cm)');

In [None]:
plt.scatter(setosa_len, setosa_wid, 
           marker='o', color='red', label='setosa');
plt.scatter(versicolor_len, versicolor_wid,
           marker='o', color='green', label='versicolor');
plt.scatter(virginica_len, virginica_wid,
           marker='o', color='blue', label='virginica');
#plt.legend(loc='upper right');
plt.annotate('setosa', xy=(5.0, 3.5), xytext=(4.25,4.0), arrowprops={'color':'red'});
plt.annotate('virginica', xy=(7.25, 3.5), xytext=(6.5, 4.0), arrowprops={'color':'blue'});
plt.annotate('versicolor', xy=(5.0, 2.0), xytext=(5.5, 1.97), arrowprops={'color':'green'});
plt.title('Iris data');
plt.xlabel('sepal length (cm)');
plt.ylabel('sepal width (cm)');

#### Working with plot styles
- Style sheets in Matplotlib
- Defaults for lines, points, backgrounds, etc
- Switch styles globally with plt.style.use()

In [None]:
plt.style.use('ggplot')
plt.scatter(setosa_len, setosa_wid, 
           marker='o', color='red', label='setosa');
plt.scatter(versicolor_len, versicolor_wid,
           marker='o', color='green', label='versicolor');
plt.scatter(virginica_len, virginica_wid,
           marker='o', color='blue', label='virginica');
#plt.legend(loc='upper right');
plt.annotate('setosa', xy=(5.0, 3.5), xytext=(4.25,4.0), arrowprops={'color':'red'});
plt.annotate('virginica', xy=(7.25, 3.5), xytext=(6.5, 4.0), arrowprops={'color':'blue'});
plt.annotate('versicolor', xy=(5.0, 2.0), xytext=(5.5, 1.97), arrowprops={'color':'green'});
plt.title('Iris data');
plt.xlabel('sepal length (cm)');
plt.ylabel('sepal width (cm)');

In [None]:
plt.style.use('fivethirtyeight')
plt.scatter(setosa_len, setosa_wid, 
           marker='o', color='red', label='setosa');
plt.scatter(versicolor_len, versicolor_wid,
           marker='o', color='green', label='versicolor');
plt.scatter(virginica_len, virginica_wid,
           marker='o', color='blue', label='virginica');
#plt.legend(loc='upper right');
plt.annotate('setosa', xy=(5.0, 3.5), xytext=(4.25,4.0), arrowprops={'color':'red'});
plt.annotate('virginica', xy=(7.25, 3.5), xytext=(6.5, 4.0), arrowprops={'color':'blue'});
plt.annotate('versicolor', xy=(5.0, 2.0), xytext=(5.5, 1.97), arrowprops={'color':'green'});
plt.title('Iris data');
plt.xlabel('sepal length (cm)');
plt.ylabel('sepal width (cm)');

#### Using legend()

In [None]:
# Specify the label 'Computer Science'
plt.plot(year, computer_science, color='red', label='Computer Science') 

# Specify the label 'Physical Sciences' 
plt.plot(year, physical_sciences, color='blue', label='Physical Sciences')

# Add a legend at the lower center
plt.legend(loc='lower center')

# Add axis labels and title
plt.xlabel('Year')
plt.ylabel('Enrollment (%)')
plt.title('Undergraduate enrollment of women')
plt.show()

#### Using annotate()

In [None]:
# Plot with legend as before
plt.plot(year, computer_science, color='red', label='Computer Science') 
plt.plot(year, physical_sciences, color='blue', label='Physical Sciences')
plt.legend(loc='lower right')

# Compute the maximum enrollment of women in Computer Science: cs_max
cs_max = computer_science.max()

# Calculate the year in which there was maximum enrollment of women in Computer Science: yr_max
yr_max = year[computer_science.argmax()]

# Add a black arrow annotation
plt.annotate('Maximum', xy=(yr_max, cs_max), xytext=(yr_max+5, cs_max+5), arrowprops={'facecolor':'k'});

# Add axis labels and title
plt.xlabel('Year')
plt.ylabel('Enrollment (%)')
plt.title('Undergraduate enrollment of women')
plt.show()

#### Modifying styles

In [None]:
# Set the style to 'ggplot'
plt.style.use('ggplot')

# Create a figure with 2x2 subplot layout
plt.subplot(2, 2, 1) 

# Plot the enrollment % of women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Plot the enrollment % of women in Computer Science
plt.subplot(2, 2, 2)
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Add annotation
cs_max = computer_science.max()
yr_max = year[computer_science.argmax()]
plt.annotate('Maximum', xy=(yr_max, cs_max), xytext=(yr_max-1, cs_max-10), arrowprops=dict(facecolor='black'))

# Plot the enrollmment % of women in Health professions
plt.subplot(2, 2, 3)
plt.plot(year, health, color='green')
plt.title('Health Professions')

# Plot the enrollment % of women in Education
plt.subplot(2, 2, 4)
plt.plot(year, education, color='yellow')
plt.title('Education')

# Improve spacing between subplots and display them
plt.tight_layout()
plt.show()