## Matplotlib Library

- Matplotlib Importing
- Two differnt ways to plot data
    - Plotting data from Numpy Arrays
    - Plotting data from pandas Dataframe
- Customizing plots
- Saving & sharing plots


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
plt.plot()

In [None]:
plt.plot(); #semicolon to get rid of that annoying bracket on the top

In [None]:
plt.plot()
plt.show()  #gonna do the exact same thing without the bracket

In [None]:
x = [1,2,3,4]
y = [11,22,33,44]
plt.plot(x,y)
plt.show()

In [None]:
#fig method - method 1
fig = plt.figure()      #cretes a figure
ax =fig.add_subplot()
plt.show()

In [None]:
#2nd method
fig = plt.figure()
ax = fig.add_axes([1,1,1,1])
ax.plot(x,y)    #adding some data
plt.show()

In [None]:
#method 3 (recommended)
fig, ax = plt.subplots()
z = [11,22,34,56]
ax.plot(x,z);
type(fig), type(ax)

![](matplotlib-anatomy-of-a-plot.png)

![](matplotlib-anatomy-of-a-plot-with-code.png)

## Matplotlib example workflow

In [None]:
# 0. import matplotlib and get it ready for plotting in Jupyter
%matplotlib inline
import matplotlib.pyplot as plt

#1. prepare data
x = [2,4,6,8]
y = [12,79,42,92]

#2. setup plot
fig,ax = plt.subplots(figsize=(7,7))    #(width,height)

#3. plot data
ax.plot(x,y)

#4. customize plot
ax.set(title="Simple Plot",
        xlabel = "x-axis",
        ylabel = "y-axis")

#5. save and show figure
fig.savefig("Assests/sample-plot-1.png")

## Making figures with Numpy Arrays
we want : 
* Line plot
* Scatter plot
* Bar plot
* Histogram
* Subplot

In [None]:
#import numpy

#create some data
x = np.linspace(0,10,num=100)
x

In [None]:
#plot the data and create a line plot
fig,ax = plt.subplots(figsize=(7,7))
ax.plot(x, x**2);

In [None]:
#use same data to make scatter plots
fig,ax = plt.subplots()
ax.scatter(x,np.sin(x));


In [None]:
#creating a plot directly from dictonary
nut_butter_prices = {"Almond Butter" : 10,
                     "Peanut Butter" : 8,
                     "Cashew Butter" : 12}

fig,ax = plt.subplots()
ax.bar(nut_butter_prices.keys(), nut_butter_prices.values())
ax.set(title="Bar plot", xlabel="butter", ylabel="prices")

In [None]:
fig,ax = plt.subplots()
ax.barh(list(nut_butter_prices.keys()), list(nut_butter_prices.values()));


In [None]:
x = np.random.randn(1000)
fig, ax = plt.subplots()
ax.hist(x)

### Two options for subplots

In [None]:
#Subplot option 1
fig, ((ax1,ax2),(ax3,ax4),(a3,a4)) = plt.subplots(nrows=3,ncols=2,figsize=(15,10))
# plot to each different axis
ax1.plot(x,x**2)
ax2.scatter(np.random.random(10),np.random.random(10))
ax3.bar(nut_butter_prices.keys(),nut_butter_prices.values())
ax4.hist(np.random.randn(100))

In [None]:
# x = np.linspace(0,10,num=100)
# fig,ax = plt.subplots()
# ax.plot(x,x**2)


In [None]:
#option 2 for subplots
fig, ax = plt.subplots(nrows=2,ncols=2,figsize=(10,5))
ax[0,0].plot(x,x/2)
ax[0,1].scatter(np.random.random(10), np.random.random(10))         #row - 0, column - 1
ax[1,0].bar(nut_butter_prices.keys(), nut_butter_prices.values())
ax[1,1].hist(np.random.randn(1000))

## Plotting from Pandas Dataframe

In [None]:
import pandas as pd

In [None]:
car_sales = pd.read_csv("/home/hp1/Documents/College/Coding/Machine Learning/zero_to_mastery_course/csv/car-sales.csv")
car_sales

In [None]:
car_sales["Price"] = car_sales["Price"].str.replace('[\$\,\.]','')
car_sales

In [None]:
#Removing the extra zeros in the Price column
car_sales["Price"] = car_sales["Price"].str[:-2]
car_sales

In [None]:
#Reassign our price column to be integer
car_sales["Price"] = car_sales["Price"].astype(int)
type(car_sales["Price"][0])

In [None]:
#adding an extra column called "Sale Dates" to the DataFrame
car_sales["Sale Date"] = pd.date_range("1/1/2022", periods=len(car_sales))
car_sales

In [None]:
car_sales["Total Sales"] = car_sales["Price"].cumsum()
car_sales

In [None]:
#Lets plot the total sales - Method 1
car_sales.plot(x="Sale Date", y = "Total Sales");

In [None]:
car_sales.plot(x="Odometer (KM)", y="Price", kind="scatter")

#### Method 2

In [None]:
#Bar graph using some dummy data
x = np.random.rand(10,4)
x

In [None]:
#Turn it into a DataFrame
df = pd.DataFrame(x, columns=["a", "b", "c", "d"])
df

In [None]:
df.plot.bar()

In [None]:
#this line of code also does the same thing
df.plot(kind="bar")

In [None]:
#Using the same methods to manipulate our car_sales dataframe
car_sales.plot(x="Make", y="Odometer (KM)", kind= "bar")

In [None]:
#Plotting Histograms
car_sales["Odometer (KM)"].plot.hist(bins = 30)

In [None]:
car_sales["Odometer (KM)"].plot(kind="hist", bins= 20)

### Trying out similar techniques on another Dataset

In [None]:
heart_disease = pd.read_csv("/home/hp1/Documents/College/Coding/Machine Learning/zero_to_mastery_course/csv/heart-disease.csv")
heart_disease.head()

In [None]:
#Create a histogram of the age
heart_disease["age"].plot.hist(bins = 50)

In [None]:
heart_disease.plot.hist(figsize= (10,30),subplots = True);

### Which one should you use? (pyplot v/s the matplotlib OO method)
- When plotting something quickly, it's okay to use the pyplot method
- When plotting something advanced, use the OO method

In [None]:
heart_disease

In [None]:
over_50 = heart_disease[heart_disease["age"]>50]
over_50

In [None]:
over_50.head()

In [None]:
over_50.plot(kind="scatter",x = "age", y= "chol",c= "target")       #this is the basic pyplot method

In [None]:
#OO method mixed with pyplot figure
fig,ax = plt.subplots(figsize=(10,6))
over_50.plot(kind='scatter',x= 'age', y= 'chol', c= 'target',ax=ax)
# ax.set_xlim([45,100])


In [None]:
#OO method from sratch
fig,ax = plt.subplots(figsize=(10,6))

#plot the data
scatter = ax.scatter(x=over_50['age'], y=over_50['chol'], c=over_50['target'])

#customize the plot
ax.set(title="Heart Disease & Cholestrol Levels",xlabel='Age', ylabel='Cholestrol')

#Add a legend
ax.legend(*scatter.legend_elements(), title='Target')

#Add a horizontal line
ax.axhline(y=over_50['chol'].mean(), linestyle='--')

In [None]:
#Subplot with max heart rate (thalach column in the over)
fig, (ax0,ax1) = plt.subplots(nrows=2,ncols=1, figsize= (10,10), sharex=True)
#add data to ax0
scatter = ax0.scatter(x=over_50["age"],y=over_50["chol"], c=over_50["target"])
#customize ax0
ax0.set(title='Heart Disease & Cholestrol levels', ylabel='Cholestrol')
#legend to ax0
ax0.legend(*scatter.legend_elements(), title='Target')
#Add a mean line
ax0.axhline(y=over_50['chol'].mean(), linestyle= '--')


#add data to ax1
scatter = ax1.scatter(x=over_50["age"],y=over_50["thalach"], c=over_50["target"])
#customize ax0
ax1.set(title='Heart Disease & Maximum Heart Rate', xlabel='Age', ylabel='Maximum Heart Rate')
#legend to ax0
ax1.legend(*scatter.legend_elements(), title='Target')
#Add a mean line
ax1.axhline(y=over_50['thalach'].mean(), linestyle= '--')

# #add a title to the figure
fig.suptitle("Heart Diseases Analysis", fontsize= 16, fontweight= 'bold')

### learning to customize the plots a little better


In [None]:
plt.style.available

In [None]:
#default style
car_sales["Price"].plot()

In [None]:
plt.style.use('seaborn-whitegrid')
car_sales['Price'].plot()

In [None]:
plt.style.available

In [None]:
# plt.style.use('seaborn')
plt.style.use('seaborn-notebook') #- this is nice
# # plt.style.use('seaborn-colorblind')
car_sales["Price"].plot()

In [None]:
#Creating some dummy data
x = np.random.randn(10,4)
x

In [None]:
df = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])
df

In [None]:
df.plot(kind='bar')

In [None]:
#customize this plot with the set() method
ax= df.plot(kind= 'bar')
ax.set(title='Random Numbers Bar Graph', xlabel='rows', ylabel='random numbers')
# ax.legend().set_visible(True)

In [None]:
#################### customizing that subplot####################################
'''
new attributes added - 
cmap
set_xlim, set_ylim

'''

#Subplot with max heart rate (thalach column in the over)
fig, (ax0,ax1) = plt.subplots(nrows=2,ncols=1, figsize= (10,10), sharex=True)
#add data to ax0
scatter = ax0.scatter(x=over_50["age"],y=over_50["chol"], c=over_50["target"],cmap='winter')
#customize ax0
ax0.set(title='Heart Disease & Cholestrol levels', ylabel='Cholestrol')
ax0.set_xlim([50,80])
#legend to ax0
ax0.legend(*scatter.legend_elements(), title='Target')
#Add a mean line
ax0.axhline(y=over_50['chol'].mean(), linestyle= '--')


#add data to ax1
scatter = ax1.scatter(x=over_50["age"],y=over_50["thalach"], c=over_50["target"],cmap='winter')
#customize ax0
ax1.set(title='Heart Disease & Maximum Heart Rate', xlabel='Age', ylabel='Maximum Heart Rate')
ax1.set_ylim([60,200])
#legend to ax0
ax1.legend(*scatter.legend_elements(), title='Target')
#Add a mean line
ax1.axhline(y=over_50['thalach'].mean(), linestyle= '--')

#add a title to the figure
fig.suptitle("Heart Diseases Analysis", fontsize= 16, fontweight= 'bold')


In [None]:
fig.savefig("Assests/Heart-Disease-Analysis-saved-with-code.png")