<a href="https://colab.research.google.com/github/Carinaaa/ML-Learning-Path/blob/intro-matplotlib/intro_matplotlib.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
plt.plot([1,2,3,4],[11,22,33,44]);

Documentation: https://matplotlib.org/stable/users/index.html

There are 2 methods: pyplot API and object-oriented API.

In [None]:
# 1st method
fig = plt.figure()
ax = fig.add_subplot()
fig.show()

In [None]:
# 2nd method
fig = plt.figure()
ax = fig.add_axes([1,1,1,1])
ax.plot([1,2,3,4],[11,22,33,44])
fig.show()

In [None]:
# 3rd method (recommended)
fig, ax = plt.subplots()
ax.plot([1,2,3,4],[50,100,200,250])
fig.show()
type(fig), type(ax)

## Matplotlib example workflow

In [None]:
# 0. import matplotlib and get it ready for plotting in notebook
%matplotlib inline
import matplotlib.pyplot as plt

# 1. Prepare data
x = [1,2,3,4]
y = [22,33,44,55]

# 2. Setup ploe
fig, ax = plt.subplots(figsize = (10,10)) # width and height

# 3. Plot data
ax.plot(x,y)

# 4. Customize plot
ax.set(title="Simple plot",
       xlabel = "x-axis",
       ylabel = "y-axis")

# 5. Save and show
fig.savefig("simple_plot.png")
fig.show()

## Making figures with NumPy arrays

We want:
* Line plot
* Scatter plot
* Bar plot
* Histogram
* Subplots

In [None]:
# Create some data
x = np.linspace(0,10,100) # start, stop, samples
x[:10]

In [None]:
# Plot the data and create a line plot
fig, ax = plt.subplots()
ax.plot(x, x**2)
ax.plot(x, x**3)

In [None]:
# Use same data to mek a scatter
fig, ax = plt.subplots()
ax.scatter(x, x**2)
ax.scatter(x, x**3)

In [None]:
# Another scatter
fig, ax = plt.subplots()
ax.scatter(x, np.sin(x), label = "sin")
ax.scatter(x, np.cos(x), label = "cos")
ax.legend()

In [None]:
# Make a plot from dictionary (bar)
nut_butter_prices = {
    "Almond": 10,
    "Peanut": 8,
    "Cashew": 12
}

fig, ax = plt.subplots()
ax.bar(nut_butter_prices.keys(), nut_butter_prices.values())
ax.set(title="Nut Butter Store",
       ylabel="Price ($)");

In [None]:
fig, ax = plt.subplots()
ax.barh(list(nut_butter_prices.keys()), list(nut_butter_prices.values()));

In [None]:
# Make some data
x = np.random.randn(1000)
y = np.random.randn(1000)
# Histogram
fig, ax = plt.subplots()
ax.hist(x, bins = 10);
ax.hist(y, bins = 10);

In [None]:
# Subplot option 1
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows = 2, ncols = 2, figsize = (10,5))
ax1.plot(x, x/2);
ax2.scatter(np.random.randn(10), np.random.randn(10));
ax3.bar(list(nut_butter_prices.keys()), list(nut_butter_prices.values()));
ax4.hist(np.random.randn(1000));

In [None]:
# Subplot option 2
fig, ax = plt.subplots(nrows = 2, ncols = 2, figsize = (10,5))

# Plot to each different index
ax[0,0].plot(x, x/2);
ax[0,1].scatter(np.random.randn(10), np.random.randn(10));
ax[1,0].bar(list(nut_butter_prices.keys()), list(nut_butter_prices.values()));
ax[1,1].hist(np.random.randn(1000));

## Plotting from pandas DataFrames

In [None]:
import pandas as pd

In [None]:

from os import mkdir
import requests

url = "https://raw.githubusercontent.com/Carinaaa/ML-Learning-Path/intro-pandas/car-sales.csv"
response = requests.get(url)

if response.status_code == 200:
    try:
      mkdir("intro-matplotlib")
    except FileExistsError:
      print("Directory already exists.")
    with open("intro-matplotlib/car-sales.csv", "wb") as f:
        f.write(response.content)
        f.close()
    print("File downloaded successfully.")
else:
    print("Failed to download file. Status code:", response.status_code)

In [None]:
#  Make a dataframe
car_sales = pd.read_csv("intro-matplotlib/car-sales.csv")
car_sales

In [None]:
ts = pd.Series(np.random.randn(1000),
               index=pd.date_range('1/1/2020', periods=1000))
ts = ts.cumsum()
ts.plot()

In [None]:
car_sales

In [None]:
car_sales["Price"] = car_sales["Price"].str.replace('[\$\,\.]','', regex=True).astype(int)
car_sales

In [None]:
# Remove last 2 zeros
car_sales["Price"] = car_sales["Price"].apply(lambda x: x//100)
car_sales

In [None]:
car_sales["Sale Date"] = pd.date_range("1/1/2020", periods=len(car_sales))
car_sales

In [None]:
car_sales["Total Sales"] = car_sales["Price"].cumsum()
car_sales

In [None]:
# Let's plot the total sale
car_sales.plot(x="Sale Date", y = "Total Sales");

In [None]:
car_sales.plot(x="Odometer (KM)", y = "Price", kind="scatter");

In [None]:
car_sales.plot(x="Odometer (KM)", y = "Price", kind="bar");

In [None]:
# Histograms
car_sales["Odometer (KM)"].plot.hist() # .plot(king = "hist")
# we can see outliners

 Bins are intervals that group a range of values from a dataset.

They are used to divide the total range of data into smaller, manageable sections.

In [None]:
car_sales["Odometer (KM)"].plot.hist(bins = 20) # number of samples

# Let's switch the data set

In [None]:
from os import mkdir
import requests

url = "https://raw.githubusercontent.com/Carinaaa/ML-Learning-Path/refs/heads/intro-matplotlib/heart-disease.csv"
response = requests.get(url)

if response.status_code == 200:
    # Save the file to a different name to avoid confusion
    try:
      mkdir("intro-matplotlib")
    except FileExistsError:
      print("Directory already exists.")
    with open("intro-matplotlib/heart-disease.csv", "wb") as f:
        f.write(response.content)
        f.close()
    print("File downloaded successfully.")
else:
    print("Failed to download file. Status code:", response.status_code)

In [None]:
heart_disease = pd.read_csv("intro-matplotlib/heart-disease.csv")
heart_disease

In [None]:
# create a histogram of age
heart_disease["age"].plot.hist(bins=10) # create 10 groups

In [None]:
3 # create 50 groups
heart_disease["age"].plot.hist(bins=50)

In [None]:
heart_disease.plot.hist(subplots=True, figsize=(10,30));

In [None]:
over_50 = heart_disease[heart_disease['age'] > 50]
over_50.head()

In [None]:
over_50.plot(kind='scatter',
             x='age',
             y='chol',
             c='target')

In [None]:
# OO Methond
fix, ax = plt.subplots(figsize=(10,6))
over_50.plot(kind='scatter',
             x = 'age',
             y='chol',
             c='target',
             ax=ax);
ax.set_xlim([45,100])

In [None]:
# OO methond from scratch
fig, ax = plt.subplots(figsize=(10,6))

# Plot the data
scatter = ax.scatter(x = over_50['age'],
                     y=over_50['chol'],
                     c=over_50['target']);
# Customize
ax.set(title="Heart Disease and Cholesterol Levels",
       xlabel="Age",
       ylabel="Cholesterol");

# Legend
ax.legend(*scatter.legend_elements(), title="Target");

# Add a horizontal line
ax.axhline(over_50['chol'].mean(),
           linestyle='--');

In [None]:
# Subplot of chol, age, thatach
fig, (ax0, ax1) = plt.subplots(nrows=2,
                              ncols=1,
                              figsize=(10,10))
# Add data to ax0
scatter = ax0.scatter(x=over_50['age'],
                      y=over_50['chol'],
                      c=over_50['target'])

# customize
ax0.set(title="Heart Disease and Cholesterol",
        xlabel="Age",
        ylabel="Cholesterol")

# Add legend
ax0.legend(*scatter.legend_elements(), title="Target")

# Ad a meanline
ax0.axhline(over_50['chol'].mean(),
            linestyle='--')

# Add data to ax1
scatter = ax1.scatter(x=over_50["age"],
                      y=over_50['thalach'],
                      c=over_50['target'])
# Customize
ax1.set(title="Heart Disease and Heart Rate",
        xlabel="Age",
        ylabel="Heart Rate")

# Add legend
ax1.legend(*scatter.legend_elements(), title="Target", labelcolor=["red"])

# Add a meanline
ax1.axhline(over_50['thalach'].mean(),
            linestyle='--')

# Add title to the figure
fig.suptitle("Heart Disease", fontsize=16, fontweight="bold");

# Adjust space between subplots
plt.subplots_adjust(hspace=0.5)

## Making the plots stylish

In [None]:
plt.style.available

In [None]:
car_sales["Price"].plot()

In [None]:
plt.style.use('dark_background')
car_sales['Price'].plot()

In [None]:
plt.style.use('ggplot')
car_sales['Price'].plot()

In [None]:
car_sales.plot(x = "Odometer (KM)", y = "Price", kind = "bar")

In [None]:
# Create some data
x = np.random.randn(10,4)
x.shape, x.ndim, x.size, type(x), x

In [None]:
df = pd.DataFrame(x, columns=['a','b','c','d'])
df

In [None]:
ax = df.plot(kind='bar')
type(ax)

In [None]:
# customize
ax = df.plot(kind='bar')
# Add dome labels and title
ax.set(title = "Random numbers",
       xlabel = "Row number",
       ylabel = "Random numbers")
# Make the legend
ax.legend().set_visible(True)

## More styles

In [None]:
plt.style.available

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
# OO Method
fig, ax = plt.subplots(figsize=(10,5))
# Add data
scatter = ax.scatter(x=over_50['age'],
                     y=over_50['chol'],
                     c=over_50['target'],
                     cmap='winter') # this changes color scheme
# Customize
ax.set(title="HR Disease",
       xlabel="age",
       ylabel='chol')

# add legend
ax.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax.axhline(over_50['chol'].mean(),
           linestyle='--',
           color='red')

In [None]:
# Customizing
fig, (ax0, ax1) = plt.subplots(nrows = 2,
                               ncols=1,
    figsize=(20,10))

# Add data to axs
scatter = ax0.scatter(x=over_50['age'],
                      y=over_50['chol'],
                      c=over_50['target'],
        cmap='summer')
# customize
ax0.set(title="Heart Disease and Cholesterol",
        xlabel="Age",
        ylabel="Cholesterol")

ax0.set_xlim([45,80])

# Add legend
ax0.legend(*scatter.legend_elements(), title="Target")

# Add a meanline
ax0.axhline(over_50['chol'].mean(),
            linestyle='--',
            color="red")

# Add data to ax1
bar = ax1.bar(x=over_50['age'],
              height=over_50['thalach'])
# Customize
ax1.set(title="Heart Disease and Heart Rate",
        xlabel="Age",
        ylabel="Heart Rate")

ax1.set_xlim([45,80])
ax1.set_ylim([60,200])


# Add a meanline
ax1.axhline(over_50['thalach'].mean(),
            linestyle='--',
            color="black")

# Add title to the figure
fig.suptitle("Heart Disease", fontsize=16, fontweight="bold");

# Adjust space between subplots
plt.subplots_adjust(hspace=0.5)
