# Load Pandas and NumPy

In [None]:
# load pandas 
import pandas as pd

# load numpy
import numpy as np

# load seaborn as sns 
# (only used for setting color palette)
import seaborn as sns


# Generate Data to Experiment Plotting Tools with:



In [None]:
# generate an array with random numbers 
random_num_array = np.random.rand(10,4)

# view random_num_array
random_num_array
# array has 10 rows and 4 coulmns
# array values are random numbers between 0 and 1

In [None]:
# create DataFrame called `data` from `random_num_array`
data = pd.DataFrame(random_num_array, columns=['A','B','C','D'])

# check `data` DataFrame
data

# Bar Plot

- we'll use the `data` DataFrame to create a variery of plots 
  - to visualize the quality of the random numbers generated in the numpy array

### Regular Bar Plot 


In [None]:
# regular bar plot
data.plot.bar(figsize=(16,9))

### Stacked Bar Plot 

In [None]:
# stacked bar plot 
data.plot.bar(stacked=True,figsize=(16,9))

### Horizontal Bar Plot


In [None]:
# create horizontal bar plot
data.plot.barh(figsize=(16,9))

### Horizontal Stacked Bar Plot





In [None]:
# create stacked horizontal bar plot 
data.plot.barh(figsize=(16,9),stacked=True)

# Change Color Palette

- [Seaborn - Color Palettes](https://seaborn.pydata.org/tutorial/color_palettes.html)

In [None]:
# set the color palette
sns.set_palette('magma')

# create horizontal bar graph 
data.plot.barh(figsize=(16,9),stacked=True)

# Area Plot

### Stacked Area Plot

In [None]:
# set the color palette
sns.set_palette('muted')

# make area plot - stacked by default
data.plot.area(figsize=(16,9))

### Unstacked Area Plot

In [None]:
# make area plot - unstacked 
data.plot.area(figsize=(16,9),stacked=False)

# Diff Plot 

- one of the major benefits of using pandas directly is that many of pandas’ helpful DataFrame manipulations can be directly used. 

- for example, consider the result of `data.diff()`, which simply takes the difference between one row and the row before it (hence the presence of NaN in the first row)

- this is helpful in many time series applications.

In [None]:
# create a diff from data 
data.diff()

- consider the following code, which plots out the differenced data, as well as demonstrating the usage of various parameters in pandas plotting, in this case, color

In [None]:
data.diff().plot.box(vert=False,
                     color={'medians':'lightblue',
                            'boxes':'blue',
                            'caps':'darkblue',});

# Rolling Mean 



- another application of pandas’ handy data manipulation functions is with `.rolling().mean()`

- it takes the average rolling mean, a common statistical method to reduce the noisiness of data that averages moving windows of data



In [None]:
# generate numpy array with random number
rand_values_vector = np.random.rand(100, 1)

# generete DataFrame from 
data_two = pd.DataFrame(rand_values_vector, columns=['value']).reset_index()

# check the DataFrame
data_two

In [None]:
# plot the values
data_two['value'].plot(figsize=(16,9))

# plot the rolling mean
data_two['value'].rolling(10).mean().plot(figsize=(16,9))

# Other Plots

In [None]:
# distribution plot
data.plot.kde(figsize=(16,9)); 

In [None]:
# scatterplot x and y
data.plot.scatter(x='A',y='B', 
                  c='C', #color of data points
                  s=data['C']*200,figsize=(16,9)); #size of data points

In [None]:
# hexbin x and y  
data.plot.hexbin(x='C',y='D', 
                 gridsize=18,figsize=(16,9)); #hexagon dimensions

# Sub Plots 

- one example of pandas doing heavy lifting in plotting for you is with subplots
  - by enabling `subplots=True`, pandas automatically create subplots based on the columns
  


### Pie Charts

- for instance, consider the following generated DataFrame, which has two columns (X and Y), 
  - as well as five rows (indices A, B, C, D, E)

- this will generate two pie charts, each with five sections

In [None]:
# generate a two column numpy array with random values
rand_value_arr = np.random.rand(5, 2) # 5 Rows, 2 Columns

# check rand value array
rand_value_arr

In [None]:
# create DataFrame from rand value array 
data = pd.DataFrame(rand_value_arr,
                    index=list("ABCDE"), 
                    columns=list("XY"))

# create pie chart from DataFrame columns
data.plot.pie(subplots=True, figsize=(8, 4));

### Line Graphs

- As another example of the convenience of subplots in direct-pandas plotting, consider plotting line data

In [None]:
# generate a two column numpy array with random values
rand_value_arr = np.random.rand(100, 4) # 100 Rows, 4 Columns

# check rand value array
rand_value_arr

In [None]:
# create DataFrame with above array 
data = pd.DataFrame(rand_value_arr, columns=['A','B','C','D'])

# plot subplots of each column
data.plot(subplots=True,figsize=(20,10));

In [None]:
# another way to do the subplots 
data.plot(subplots=True,layout=(2,2), figsize=(20,10));