### Advanced Matplotlib Concepts

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Colored Scatterplots

In [None]:
iris = pd.read_csv('./data/iris.csv')
iris.sample(5)

In [None]:
# replace iris species with 0,1 & 2
iris['Species'] = iris['Species'].replace({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2 })
iris.sample(5)

In [None]:
# plotting for iris data

# the third argument c=iris['Species'] in the plt.scatter() function represents the color mapping for the scatter plot points.
plt.scatter(iris['SepalLengthCm'], iris['PetalLengthCm'], c=iris['Species'], cmap='jet', alpha=0.7)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')

# colorbar function represent which value is repreneted by corresponding color
plt.colorbar()
plt.show()

### Plot Size

In [None]:
# plot graph by adjusting its size
plt.figure(figsize=(15,7))

plt.scatter(iris['SepalLengthCm'], iris['PetalLengthCm'], c=iris['Species'], cmap='jet', alpha=0.7)
plt.xlabel('Sepal Length')
plt.ylabel('Petal Length')

plt.colorbar()

### Annotations

In [6]:
df = pd.read_csv('./data/batter.csv')

# pick top 100 batsmen and pick 25 randomly among them
# the random_state=5 parameter controls the random number generator used for sampling, ensuring reproducible results.
sample_df = df.head(100).sample(25, random_state=5)

In [None]:
# scatter plot on column average `avg` and `strike_rate`
plt.scatter(sample_df['avg'], sample_df['strike_rate'])

In [None]:
# scatter plot with labelling example
x = [1,2,3,4]
y = [5,6,7,8]

plt.scatter(x,y)
plt.text(1,5, 'Point 1')
plt.text(2,6, 'Point 2')
plt.text(3,7, 'Point 3')
plt.text(4,8, 'Point 4', fontdict={'size':12, 'color': 'brown'})

In [None]:
# scatter plot with labelling

plt.figure(figsize=(16,12))

# scatter plot on column average `avg` and `strike_rate`
plt.scatter(sample_df['avg'], sample_df['strike_rate'], s=sample_df['runs'])

for i in range(sample_df.shape[0]):
    plt.text(sample_df['avg'].values[i], sample_df['strike_rate'].values[i], sample_df['batter'].values[i])

### Hrizontal and Vertical lines on Graph

In [None]:
plt.figure(figsize=(16,12))

# scatter plot on column average `avg` and `strike_rate`
plt.scatter(sample_df['avg'], sample_df['strike_rate'], s=sample_df['runs'])

# draw (axis horizontal line) horizontal line 
# to easily get batter with strike rate of above 130 
plt.axhline(130, color='red')
plt.axhline(140, color='blue')

# vertical line
plt.axvline(30, color='maroon')

for i in range(sample_df.shape[0]):
    plt.text(sample_df['avg'].values[i], sample_df['strike_rate'].values[i], sample_df['batter'].values[i])

### Subplots

In [None]:
# A diff way to plot graphs
batter_data = pd.read_csv('./data/batter.csv')
plt.scatter(batter_data['avg'], batter_data['strike_rate'])

In [None]:
# using subplots

# fig is figure object and axis represent axis object
fig, axis = plt.subplots()

axis.scatter(batter_data['avg'], batter_data['strike_rate'])

axis.set_title("IPL Batter's data")
axis.set_xlabel('Avg')
axis.set_ylabel('Strike Rate')

fig.show()

In [None]:
fig,axis = plt.subplots(figsize=(15,6))

# axis.plot(batter_data['avg'], batter_data['strike_rate'])
axis.scatter(batter_data['avg'], batter_data['strike_rate'], color='red')

axis.set_title("IPL Top Batter's data")
axis.set_xlabel('Avg')
axis.set_ylabel('Strike Rate')

In [None]:
# two scatter plots side by side 
fig, axis = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(10,6))

axis[0].scatter(batter_data['avg'], batter_data['strike_rate'], color='red')
axis[1].scatter(batter_data['avg'], batter_data['runs'], color='black')

axis[0].set_title('Avg Vs Strike Rate')
axis[0].set_ylabel('Strike Rate')

axis[1].set_title('Avg Vs Runs')
axis[1].set_ylabel('Runs')
axis[1].set_xlabel('Avg')

In [None]:
fig, axis = plt.subplots(nrows=2, ncols=2, figsize=(10,10))

axis[1,0]

In [None]:
# four graphs 
fig, axis = plt.subplots(nrows=2, ncols=2, figsize=(10,10))

axis[0,0].scatter(batter_data['avg'], batter_data['strike_rate'], color='red')
axis[0,1].scatter(batter_data['avg'], batter_data['runs'], color='black')
axis[1,0].hist(batter_data['avg'])
axis[1,1].hist(batter_data['runs'])

# axis[0].set_title('Avg Vs Strike Rate')
# axis[0].set_ylabel('Strike Rate')

# axis[1].set_title('Avg Vs Runs')
# axis[1].set_ylabel('Runs')
# axis[1].set_xlabel('Avg')

In [None]:
# four graphs side by side
fig = plt.figure()

ax1 = fig.add_subplot(2,2,1)
ax1.scatter(batter_data['avg'], batter_data['strike_rate'], color='red')

ax2 = fig.add_subplot(2,2,2)
ax2.hist(batter_data['avg'])

ax3 = fig.add_subplot(2,2,3)
ax3.scatter(batter_data['avg'], batter_data['runs'], color='black')

ax4 = fig.add_subplot(2,2,4)
ax4.hist(batter_data['runs'])

### 3D Scatter Plot

In [None]:
# 3D Scatter plot example

fig = plt.figure()

ax = plt.subplot(projection='3d')
ax

In [None]:
# 3D Scatter plot
fig = plt.figure()

ax = plt.subplot(projection='3d')
ax.scatter3D(batter_data['runs'], batter_data['avg'], batter_data['strike_rate'])

ax.set_title("IPL Batsmen analysis")
ax.set_xlabel("Runs")
ax.set_ylabel("Avg")
ax.set_zlabel("SR")

### 3D Line Plot

In [None]:
# 3D Line Plot example
x = [0,1,5,25]
y = [0,10,13,0]
z = [0,13,20,9]

fig = plt.figure()
ax = plt.subplot(projection='3d')
ax.scatter3D(x,y,z, s=[100,100,100,100])
ax.plot3D(x,y,z, color='red')

### 3D Surface Plots

In [None]:
# example 1 - 3D surface plotting using basic example

# creates an array of 100 equally spaced numbers from -10 to 10
x = np.linspace(-10,10,100)
y = np.linspace(-10,10,100)

# meshgrid(x,y) takes the two 1D arrays (x and y) and creates two 2D arrays (xx and yy)
# `xx` is a 100×100 matrix where each row repeats the x values and
# `yy` is a 100×100 matrix where each column repeats the y values
# together, they create a grid of 10,000 coordinate points covering the square from (-10,-10) to (10,10)
xx, yy = np.meshgrid(x,y)

z = xx**2 + yy**2

fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')

# cmap='viridis' change the default color
c = ax.plot_surface(xx,yy,z, cmap='viridis')
fig.colorbar(c)

In [None]:
# example 2 - 3D surface plotting using basic example
z = np.sin(xx) + np.cos(yy)

fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')

c = ax.plot_surface(xx,yy,z, cmap='viridis')
fig.colorbar(c)

In [None]:
# example 3 - 3D surface plotting using basic example
z = np.sin(xx) + np.log(yy)

fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')

c = ax.plot_surface(xx,yy,z, cmap='viridis')
fig.colorbar(c)

In [None]:
# example 4 - 3D surface plotting using basic example
z = np.sin(xx) + np.log(xx)

fig = plt.figure(figsize=(12,8))
ax = plt.subplot(projection='3d')

c = ax.plot_surface(xx,yy,z, cmap='viridis')
fig.colorbar(c)

### Contour Plots
- This graph used to represent 3D graph in 2D

In [None]:
# example 1 - contour plot

z = xx**2 + yy**2

fig = plt.figure(figsize=(12,8))
ax = plt.subplot()

c = ax.contour(xx,yy,z, cmap='viridis')
fig.colorbar(c)

In [None]:
# example 2 - another contour plot example using `contourf`
z = xx**2 + yy**2

fig = plt.figure(figsize=(12,8))
ax = plt.subplot()

c = ax.contourf(xx,yy,z, cmap='viridis')
fig.colorbar(c)

In [None]:
# example 3 - contour plot

z = np.sin(xx) + np.cos(yy)

fig = plt.figure(figsize=(12,8))
ax = plt.subplot()

c = ax.contourf(xx,yy,z, cmap='viridis')
fig.colorbar(c)

### Heatmap
- In Heatmap, we used to convert grid into graph and the grid show how values has been distributed.

In [None]:
# IPL bowling analysis 

delivery = pd.read_csv('./data/IPL_Ball_by_Ball_2008_2022.csv')

# we want to fetch only those data where six has been hit
temp_df = delivery[(delivery['ballnumber'].isin([1,2,3,4,5,6])) & (delivery['batsman_run'] == 6)]

grid = temp_df.pivot_table(index='overs', columns='ballnumber', values='batsman_run', aggfunc='count')

plt.figure(figsize=(20,10))

# to plot heatmap, we can use `imshow` method from matplotlib
plt.imshow(grid)

# replace over from 0-19 to 1-20
plt.yticks(delivery['overs'].unique(), list(range(1,21)))

# replace ballnumber
plt.xticks(np.arange(0,6), list(range(1,7)))
plt.colorbar()

### Pandas Plot()

In [None]:
# plot on a series
s = pd.Series([1,2,3,4,5,6,7])
s.plot(kind='line')
# s.plot(kind='hist')
# s.plot(kind='pie')


##### Plot can be used on dataframe as well

In [None]:
import seaborn as sns

# `tips` contains resturant data
tips = sns.load_dataset('tips')
tips['size'] = tips['size'] * 100

tips.head()

In [None]:
# scatter plot --> labels --> marker --> figsize --> color --> cmap

# plot scatter plot on `tips`
# tips.plot(kind='scatter', x='total_bill', y='tip', title='Cost Analysis', color='red', figsize=(10,6), s='size') 

tips.plot(kind='scatter', x='total_bill', y='tip', title='Cost Analysis',figsize=(10,6), s='size', c='sex', cmap='viridis') 

##### lineplot 

In [None]:
# line plot example (using stocks data)
stocks = pd.read_csv('https://raw.githubusercontent.com/m-mehdi/pandas_tutorials/main/weekly_stocks.csv')
stocks.head()


In [None]:
# line plot on single column
stocks['MSFT'].plot(kind='line')

In [None]:
# line plot on entire data frame
stocks.plot(kind='line', x='Date')

In [None]:
# line plot on select columns
stocks[['Date', 'AAPL', 'FB']].plot(kind='line', x='Date')

##### barchart

In [62]:
# bar chart -->single --> horizontal --> multiple
# using tips data

tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,200
1,10.34,1.66,Male,No,Sun,Dinner,300
2,21.01,3.50,Male,No,Sun,Dinner,300
3,23.68,3.31,Male,No,Sun,Dinner,200
4,24.59,3.61,Female,No,Sun,Dinner,400
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,300
240,27.18,2.00,Female,Yes,Sat,Dinner,200
241,22.67,2.00,Male,Yes,Sat,Dinner,200
242,17.82,1.75,Male,No,Sat,Dinner,200


### **Colored Scatter Plots in Matplotlib** 
---

#### **1. Purpose of Scatter Plots**
- Visualize **correlation** between two numerical variables (X vs Y).
- Identify patterns/clusters in data.

#### **2. Basic Scatter Plot**
```python
plt.scatter(df['petal_length'], df['sepal_length'])
plt.xlabel('Petal Length')
plt.ylabel('Sepal Length')
plt.show()
```

#### **3. Color-Coding by Category**
- **Goal**: Add a third dimension (categorical data) using colors.
- **Steps**:
  1. **Map categories to numbers** (e.g., Iris-setosa=0, Iris-versicolor=1).
  2. Pass the numeric column to the `c` parameter in `scatter()`.

```python
# Example: Map species to numbers
df['species_num'] = df['species'].replace({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})

plt.scatter(df['petal_length'], df['sepal_length'], c=df['species_num'], cmap='viridis')
plt.colorbar(label='Species')  # Adds a color legend
```

#### **4. Key Parameters**
- **`c`**: Column for color-coding (numeric or categorical mapped to numbers).
- **`cmap`**: Color map (e.g., `'viridis'`, `'plasma'`, `'rainbow'`).  
  - List all available: `print(plt.colormaps())`.
- **`alpha`**: Transparency (0=transparent, 1=opaque).  
  ```python
  plt.scatter(..., alpha=0.5)  # Semi-transparent points
  ```

#### **5. Customizing Colors**
- Use predefined colormaps:
  ```python
  plt.scatter(..., cmap='tab10')  # Discrete colors
  ```
- **Colorbar**: Add a legend for color values:
  ```python
  plt.colorbar(ticks=[0,1,2], label='Species')
  ```

#### **6. Example with Iris Dataset**
```python
import matplotlib.pyplot as plt
import pandas as pd

# Load data
df = pd.read_csv('iris.csv')
df['species_num'] = df['species'].astype('category').cat.codes  # Convert categories to numbers

# Plot
plt.scatter(df['petal_length'], df['sepal_length'], 
            c=df['species_num'], cmap='viridis', alpha=0.7)
plt.xlabel('Petal Length')
plt.ylabel('Sepal Length')
plt.colorbar(label='Species (0=setosa, 1=versicolor, 2=virginica)')
plt.title('Iris Dataset: Petal vs Sepal Length')
plt.show()
```

#### **7. Key Takeaways**
- **Color-coding** adds a **third dimension** (categorical/numerical) to 2D scatter plots.
- **`cmap`** choices impact readability (use sequential maps for ordered data, qualitative for categories).
- **`alpha`** helps visualize overlapping points.

#### **8. Common Use Cases**
- Cluster analysis (e.g., species classification).
- Multivariate exploration (3+ variables in a 2D plot).

#### **Visual Output**
- **Blue points**: Iris-setosa (0).
- **Green/Yellow points**: Iris-versicolor (1).
- **Red points**: Iris-virginica (2).
---

### **1. Adjusting Figure Size**
- **Problem**: Default plot size may be too small.
- **Solution**: Use `plt.figure(figsize=(width, height))` before plotting.
  ```python
  plt.figure(figsize=(15, 7))  # Width=15 inches, Height=7 inches
  plt.scatter(x, y)
  plt.show()
  ```
- **Applies to**: All plot types (scatter, bar, histogram, etc.).

---

### **2. Annotations (Adding Labels to Points)**
- **Goal**: Label specific data points on a plot (e.g., player names in a scatter plot).
- **Methods**:
  - **Manual Annotation**:
    ```python
    plt.annotate(
        'Label', 
        xy=(x_coord, y_coord),  # Point to label
        xytext=(x_offset, y_offset),  # Label position
        arrowprops=dict(arrowstyle='->')  # Optional arrow
    )
    ```
  - **Loop for Multiple Points**:
    ```python
    for i, row in df.iterrows():
        plt.annotate(
            row['batsman'], 
            xy=(row['average'], row['strike_rate']),
            xytext=(5, 5), 
            textcoords='offset pixels'
        )
    ```

---

### **3. Bubble Charts (Size-Encoded Scatter Plots)**
- **Use Case**: Show **three variables** (X, Y, and size) in a 2D plot.
- **Implementation**: Use the `s` parameter in `scatter()` for point sizes.
  ```python
  plt.scatter(
      df['average'], 
      df['strike_rate'], 
      s=df['runs'] * 10,  # Scale size for visibility
      alpha=0.5  # Transparency
  )
  ```
- **Key Points**:
  - Larger bubbles = higher value of the third variable (e.g., total runs).
  - Use `alpha` to handle overlapping points.

---

### **4. Key Takeaways**
1. **Customization**:
   - Control figure size with `figsize`.
   - Annotate points to add context.
   - Use bubble charts for multivariate data.
2. **Universal Techniques**:
   - `figsize` works for all plot types.
   - Annotations can be added to any plot (scatter, bar, etc.).
3. **Visual Clarity**:
   - Adjust `alpha` for transparency.
   - Scale bubble sizes (`s`) appropriately.

---

### **Example: IPL Batsmen Analysis**
```python
plt.figure(figsize=(18, 10))
plt.scatter(
    df['average'], 
    df['strike_rate'], 
    s=df['runs'] * 0.1,  # Scale down bubble size
    alpha=0.7
)
for i, row in df.iterrows():
    plt.annotate(
        row['batsman'], 
        xy=(row['average'], row['strike_rate']),
        fontsize=8,
        ha='center'
    )
plt.xlabel('Average')
plt.ylabel('Strike Rate')
plt.title('IPL Batsmen: Avg vs Strike Rate (Bubble Size = Total Runs)')
plt.show()
```

---

### **Common Pitfalls & Fixes**
- **Overlapping Labels**: Reduce font size or use offsets (`xytext`).
- **Small/Large Bubbles**: Adjust scaling factor (e.g., `s=df['runs'] * 0.1`).
- **Cluttered Plots**: Filter data (e.g., top 25 batsmen) or use transparency (`alpha`).

---

### **1. Adding Horizontal/Vertical Lines**
#### **Purpose**:
- Highlight thresholds or benchmarks in plots (e.g., strike rate > 130, average > 30).
- Divide graphs into quadrants for analysis.

#### **Methods**:
- **Horizontal Line**:  
  ```python
  plt.axhline(y=130, color='red', linestyle='--', label='Strike Rate Threshold')
  ```
- **Vertical Line**:  
  ```python
  plt.axvline(x=30, color='blue', linestyle=':', label='Average Threshold')
  ```

#### **Parameters**:
- `y`/`x`: Position of the line.
- `color`: Line color (e.g., `'red'`, `'green'`).
- `linestyle`: `'-'`, `'--'`, `':'`, etc.
- `alpha`: Transparency (0 to 1).
- `label`: For legend.

#### **Example**:
```python
plt.scatter(df['average'], df['strike_rate'])
plt.axhline(y=130, color='red', label='Min Strike Rate')  # Horizontal
plt.axvline(x=30, color='blue', label='Min Average')      # Vertical
plt.legend()
plt.show()
```
**Output**: Divides the plot into quadrants to identify batsmen with:
- High strike rate (>130) **and** high average (>30).
- High strike rate but low average, etc.

---

### **2. Annotations (Text on Plots)**
#### **Use Case**:
- Label specific points (e.g., player names) to add context.

#### **Methods**:
- **Single Annotation**:
  ```python
  plt.annotate(
      'Virat Kohli', 
      xy=(45, 140),                 # Point coordinates
      xytext=(5, 5),                # Text offset
      textcoords='offset pixels',    # Offset unit
      fontsize=10,
      color='green'
  )
  ```
- **Loop for Multiple Annotations**:
  ```python
  for i, row in df.iterrows():
      plt.annotate(
          row['batsman'], 
          xy=(row['average'], row['strike_rate']),
          xytext=(5, 5), 
          textcoords='offset pixels',
          fontsize=8
      )
  ```

#### **Customization**:
- `fontsize`: Adjust text size.
- `arrowprops`: Add arrows (e.g., `dict(arrowstyle='->')`).
- `ha`/`va`: Horizontal/vertical alignment (`'center'`, `'left'`).

---

### **3. Practical Example: IPL Batsmen Analysis**
```python
plt.figure(figsize=(12, 8))
plt.scatter(
    df['average'], 
    df['strike_rate'], 
    s=df['runs']*0.1,  # Bubble size = total runs
    alpha=0.7
)

# Threshold lines
plt.axhline(y=130, color='red', linestyle='--', label='Strike Rate > 130')
plt.axvline(x=30, color='blue', linestyle=':', label='Average > 30')

# Annotations
for i, row in df.head(25).iterrows():  # Label top 25 batsmen
    plt.annotate(
        row['batsman'], 
        xy=(row['average'], row['strike_rate']),
        fontsize=8,
        ha='center'
    )

plt.xlabel('Average')
plt.ylabel('Strike Rate')
plt.title('IPL Batsmen: Strike Rate vs Average (Size = Total Runs)')
plt.legend()
plt.show()
```

---

### **4. Key Takeaways**
1. **Threshold Lines**:
   - Use `axhline`/`axvline` to highlight benchmarks.
   - Combine lines to create quadrants for segmented analysis.
2. **Annotations**:
   - Label points dynamically using loops for large datasets.
   - Customize text appearance (`fontsize`, `color`).
3. **Visual Clarity**:
   - Adjust `alpha` for transparency.
   - Scale bubble sizes (`s`) to represent a third variable (e.g., total runs).
4. **Real-World Use**:
   - Identify outliers (e.g., high strike rate + low average).
   - Compare subgroups (e.g., top performers vs average players).

---

### **Common Pitfalls & Fixes**
- **Overlapping Text**: Reduce `fontsize` or use offsets (`xytext`).
- **Cluttered Lines**: Use different `linestyle` (e.g., `'--'`, `':'`).
- **Small/Large Bubbles**: Adjust scaling factor (e.g., `s=df['runs']*0.1`).

---

### **1. What are Subplots?**
- **Purpose**: Plot multiple graphs (axes) in a single figure, arranged in a grid.
- **Use Case**: Compare different datasets/variables side-by-side or analyze multiple aspects of data simultaneously.

---

### **2. Creating Subplots**
#### **Method 1: `plt.subplots()` (Recommended)**
```python
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))  # 2x2 grid
```
- **Parameters**:
  - `nrows`, `ncols`: Grid dimensions (e.g., `2, 2` for 4 plots).
  - `figsize`: Figure size (width, height in inches).
- **Returns**:
  - `fig`: Figure object (container for all plots).
  - `axes`: Array of axes (individual plots). Access plots via `axes[0,0]`, `axes[0,1]`, etc.

#### **Method 2: `plt.subplot()` (Legacy)**
```python
plt.subplot(2, 2, 1)  # 2x2 grid, activate 1st plot
plt.scatter(x1, y1)
plt.subplot(2, 2, 2)  # Activate 2nd plot
plt.scatter(x2, y2)
```
- **Disadvantage**: Manual positioning, less flexible.

---

### **3. Plotting on Subplots**
- **Access axes** and plot using methods like `scatter()`, `plot()`, etc.:
  ```python
  # Plot on first axis (top-left)
  axes[0, 0].scatter(df['average'], df['strike_rate'], color='red')
  axes[0, 0].set_title('Avg vs Strike Rate')

  # Plot on second axis (top-right)
  axes[0, 1].scatter(df['runs'], df['average'], color='blue')
  axes[0, 1].set_title('Runs vs Avg')
  ```

#### **Key Methods for Axes**:
- `set_xlabel()`, `set_ylabel()`: Axis labels.
- `set_title()`: Plot title.
- `grid()`: Add grid lines.

---

### **4. Customizing Subplots**
#### **Shared Axes**:
```python
fig, axes = plt.subplots(2, 1, sharex=True)  # Share X-axis between subplots
```
- Avoids redundant labels (e.g., only bottom plot shows X-axis labels).

#### **Adjust Layout**:
```python
plt.tight_layout()  # Prevents overlapping labels
```

#### **Example: 4 Subplots (2x2 Grid)**
```python
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# Plot 1: Scatter
axes[0, 0].scatter(df['average'], df['strike_rate'], s=50, alpha=0.6)
axes[0, 0].set_title('Avg vs Strike Rate')

# Plot 2: Histogram
axes[0, 1].hist(df['runs'], bins=20, color='green')
axes[0, 1].set_title('Runs Distribution')

# Plot 3: Boxplot
axes[1, 0].boxplot(df['average'])
axes[1, 0].set_title('Average Stats')

# Plot 4: Line Plot
axes[1, 1].plot(df['strike_rate'], marker='o')
axes[1, 1].set_title('Strike Rate Trend')

plt.tight_layout()
plt.show()
```

---

### **5. Advanced Techniques**
#### **Uneven Grids with `GridSpec`**:
```python
import matplotlib.gridspec as gridspec
fig = plt.figure(figsize=(10, 8))
gs = gridspec.GridSpec(2, 2, width_ratios=[1, 2], height_ratios=[2, 1])

ax1 = plt.subplot(gs[0])  # Top-left (taller)
ax2 = plt.subplot(gs[1])  # Top-right (wider)
ax3 = plt.subplot(gs[2])  # Bottom-left
```

#### **Adding Insets**:
```python
ax_inset = fig.add_axes([0.6, 0.6, 0.2, 0.2])  # [x, y, width, height]
ax_inset.hist(df['runs'], bins=10)
```

---

### **6. Key Takeaways**
1. **Flexibility**: Arrange plots in any grid (e.g., 1x2, 2x2, 3x1).
2. **Consistency**: Use shared axes for aligned comparisons.
3. **Customization**: Control individual plots via `axes` array.
4. **Real-World Use**:
   - Compare distributions (histograms).
   - Analyze correlations (scatter plots).
   - Track trends over time (line plots).

---

### **Common Pitfalls & Fixes**
- **Overlapping Labels**: Use `tight_layout()` or adjust `figsize`.
- **Wrong Axis Reference**: Double-check indices (e.g., `axes[0,1]` vs `axes[1,0]`).
- **Cluttered Plots**: Limit subplots or increase figure size.

---

### **Example Workflow**
1. **Initialize Subplots**:
   ```python
   fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
   ```
2. **Plot Data**:
   ```python
   axes[0].scatter(x1, y1, color='red')
   axes[1].hist(x2, bins=15, color='blue')
   ```
3. **Add Labels/Titles**:
   ```python
   axes[0].set_ylabel('Strike Rate')
   axes[1].set_xlabel('Average')
   ```
4. **Display**:
   ```python
   plt.tight_layout()
   plt.show()
   ```

This approach is essential for **exploratory data analysis (EDA)** and **dashboard-style visualizations**.

---


- Different graph plotting - 3D Scatter Plots, 3D Line Plot, 3D Surface Plots, Contour Plots

---

### 🔷 Summary: 3D Plotting in `matplotlib`

You're learning how to plot **3D charts** using `matplotlib` in Python, especially for visualizing cricket (IPL batsmen) data with metrics like **runs**, **average**, and **strike rate**.

#### 📌 Four Main 3D Plots Discussed:

1. **3D Scatter Plot**
2. **3D Line Plot**
---

### ✅ Important Concepts & Steps

#### 1. **Import Required Libraries**

```python
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # For 3D projections
```

---

#### 2. **Create a 3D Figure & Axes**

```python
fig = plt.figure(figsize=(10, 7))  # Optional: set figure size
ax = fig.add_subplot(111, projection='3d')  # Make 3D axes
```

* `projection='3d'` is **essential** for enabling 3D plotting.

---

#### 3. **3D Scatter Plot**

```python
# Sample data
x = df['Runs']
y = df['Average']
z = df['Strike Rate']

ax.scatter(x, y, z)
```

##### 🎯 Use Case:

To visualize the **relationship between 3 numerical variables** (e.g., performance of batsmen).

##### 📌 Extras:

```python
ax.set_title('IPL Batsmen Analysis')
ax.set_xlabel('Runs')
ax.set_ylabel('Average')
ax.set_zlabel('Strike Rate')
```

* You can also customize:

  * `color`, `marker`, `s` (marker size), etc.

---

#### 4. **3D Line Plot**

* Plotting multiple points as a **connected line** in 3D space.

```python
x = [0, 1, 5]
y = [0, 1, 3]
z = [0, 1, 3]

ax.plot(x, y, z)  # Connects points in order
```

#### 🎯 Use Case:

To show **trends or transitions** across 3D space.

#### ⚠️ Note:

* **Less common in real-world 3D analysis**.
* 3D Line Plots are mainly for **trajectory-like visualizations**.

---

## 💡 Tips & Takeaways

| Concept               | Detail                                                          |
| --------------------- | --------------------------------------------------------------- |
| `projection='3d'`     | Required to activate 3D plotting.                               |
| `ax.scatter()`        | Used for plotting 3D scatter points.                            |
| `ax.plot()`           | Used for 3D line plots.                                         |
| `.set_xlabel()`, etc. | For setting axis labels.                                        |
| Customizations        | You can adjust `marker`, `color`, `size`, etc.                  |
| Use case of 3D plots  | Best when analyzing 3 features at once.                         |
| Size matching         | Marker sizes or styles should be length-compatible with points. |

---

### 🧠 Visualization Example Idea

If you have a DataFrame like:

```python
df = pd.DataFrame({
    'Batsman': [...],
    'Runs': [...],
    'Average': [...],
    'Strike Rate': [...]
})
```

Then plotting 3D scatter:

```python
ax.scatter(df['Runs'], df['Average'], df['Strike Rate'], c='green', marker='^')
```

---

### ✅ **4 Key 3D Plot Types in `matplotlib`** (Summary)

| Plot Type                           | Purpose / Use Case                                                                     |
| ----------------------------------- | -------------------------------------------------------------------------------------- |
| **1. 3D Scatter Plot**              | To visualize relationship between 3 numerical variables (e.g., Runs, Avg, Strike Rate) |
| **2. 3D Line Plot**                 | To visualize point-to-point connections in 3D (like path or trajectory)                |
| **3. 3D Surface Plot**              | To visualize 3D functions (e.g., loss surfaces) — useful in **machine learning**       |
| **4. Contour & Contour-Fill Plots** | 2D heatmap-style top-down view of surface; shows height (z-values) via color bands     |

---

### 🔷 **3. 3D Surface Plot (`plot_surface`)**

#### 📌 Purpose:

Used to visualize mathematical functions in 3D (e.g., `z = x² + y²`, `z = sin(x) + cos(y)`). These are especially useful to analyze **loss functions** in machine learning.

---

#### ✨ Code Steps to Create a 3D Surface Plot:

```python
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # Needed for 3D

# Step 1: Create meshgrid for X and Y
x = np.linspace(-10, 10, 100)
y = np.linspace(-10, 10, 100)
X, Y = np.meshgrid(x, y)

# Step 2: Define Z as a function of X and Y
Z = X**2 + Y**2  # You can replace with sin, cos, etc.

# Step 3: Create figure and 3D axes
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')

# Step 4: Plot surface
surf = ax.plot_surface(X, Y, Z, cmap='viridis')  # use 'plasma', 'coolwarm' etc.

plt.colorbar(surf)  # Optional: shows color gradient scale
plt.show()
```

---

#### 🧠 Notes:

* `X`, `Y` → 2D meshgrids (created using `np.meshgrid`)
* `Z` → function of `X`, `Y`
* `plot_surface(X, Y, Z)` → core plotting function
* `cmap='...'` → changes the color gradient (e.g., `'plasma'`, `'inferno'`, `'coolwarm'`)
* Higher Z values (e.g., mountain peaks) are **bright/yellow**, lower Z values (e.g., valleys) are **dark/purple**

---

### 🔷 **4. Contour & Contour-Fill Plot**

#### 📌 Purpose:

Used for a **top-down 2D view** of a 3D surface, where **colors represent height (Z values)**. Think of it like viewing a terrain map from above.

---

#### ✨ Code Steps for Contour Plot:

```python
# Same mesh and Z as above
x = np.linspace(-10, 10, 100)
y = np.linspace(-10, 10, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2

# Basic Contour Plot
plt.figure(figsize=(8, 6))
plt.contour(X, Y, Z, cmap='plasma')
plt.colorbar()
plt.title('Contour Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
```

---

#### ✨ Code Steps for Filled Contour Plot:

```python
plt.figure(figsize=(8, 6))
plt.contourf(X, Y, Z, cmap='plasma')  # 'f' = filled contours
plt.colorbar()
plt.title('Filled Contour Plot')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
```

---

#### 🧠 Notes:

* `contour()` → draws contour lines (like elevation curves)
* `contourf()` → fills the space between lines with color
* Darker colors = **lower** values of Z
* Lighter/bright colors = **higher** values of Z
* Great for understanding "where" the surface goes up/down quickly

---

### 🧠 Use Cases in Machine Learning:

* Used heavily in:

  * **Loss function visualization**
  * **Gradient descent path**
  * **Decision boundary visualizations**
* Example: `Z = loss_function(w1, w2)` where `w1`, `w2` are model weights

---

### ✅ Recap of Key Points

| Concept                    | Detail                                           |
| -------------------------- | ------------------------------------------------ |
| `np.meshgrid()`            | Creates grid of X and Y values                   |
| `plot_surface(X, Y, Z)`    | Plots a 3D surface                               |
| `contour()` / `contourf()` | 2D projections of 3D surfaces                    |
| `cmap='plasma'` etc.       | Color gradients for better visualization         |
| `plt.colorbar()`           | Shows the Z-value color scale                    |
| `Z = f(X, Y)`              | Can be any mathematical or user-defined function |

---

### 🔍 Bonus: Try These Function Variants

* `Z = np.sin(X) + np.cos(Y)`
* `Z = np.exp(-X**2 - Y**2)`
* `Z = np.sqrt(X**2 + Y**2)`
* `Z = np.sin(np.sqrt(X**2 + Y**2))`

These will give **interesting shapes and landscapes** to visualize!

---

### 🔥 Heatmap in Python (Matplotlib + Seaborn)

#### 📌 **What is a Heatmap?**

* A **2D graphical representation of data** where individual values in a matrix are represented as colors.
* Typically used to **compare two categorical variables** (e.g., over number & ball number).
* **Color intensity** shows the magnitude of values.

  * **Red/yellow** = High values
  * **Blue/purple** = Low values

---

### ✅ Real-World IPL Use Case: **6s on Each Ball**

#### 🎯 Goal:

Plot a heatmap to visualize **how many sixes** have been hit **on each ball** (ball 1 to 6) of **each over** (over 1 to 20) in **all IPL matches**.

---

### 🛠 Steps to Build the Heatmap

#### **1. Load Ball-by-Ball IPL Dataset**

```python
import pandas as pd
df = pd.read_csv("IPL_Ball_by_Ball_Dataset.csv")
```

---

#### **2. Filter Dataset for Sixes Only**

```python
# Filter where batsman runs == 6
df_sixes = df[df['batsman_runs'] == 6]
```

---

#### **3. Create a Pivot Table or Grouped Data**

We want a 2D table like:

```
        Ball1  Ball2  Ball3  Ball4  Ball5  Ball6
Over1     10     12     7     9     6      11
Over2     15     8      4     13    9      5
...
```

```python
heatmap_data = df_sixes.pivot_table(
    index='over',       # Y-axis
    columns='ball',     # X-axis
    values='batsman_runs',  # Any column, we use 'count'
    aggfunc='count',    # Count how many sixes per over-ball
    fill_value=0        # Fill missing with 0
)
```

---

#### **4. Plot the Heatmap using Seaborn**

```python
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, annot=True, cmap='plasma')
plt.title("Total Sixes per Over and Ball in IPL History")
plt.xlabel("Ball Number")
plt.ylabel("Over Number")
plt.show()
```

---

### 💡 Important Notes & Concepts

| Concept             | Explanation                                                           |
| ------------------- | --------------------------------------------------------------------- |
| **Heatmap**         | Visual summary of how a value changes across 2 dimensions.            |
| **Pivot Table**     | Converts raw data into 2D grid suitable for heatmap plotting.         |
| **Seaborn Heatmap** | `sns.heatmap()` is the easiest way to create heatmaps in Python.      |
| **`annot=True`**    | Displays actual values in each heatmap cell.                          |
| **`cmap='plasma'`** | Color palette — other options: `'coolwarm'`, `'viridis'`, `'YlGnBu'`. |
| **`fill_value=0`**  | Ensures no missing values in the matrix.                              |
| **Use Case**        | Shows where sixes are more likely — useful for strategy building.     |

---

### 🧠 Insights from the IPL Heatmap

* Helps identify:

  * **Which ball in which over** is **most vulnerable** for sixes.
  * **Risk zones** for bowlers to avoid.
* Useful for:

  * Bowling strategy
  * Fantasy cricket prediction
  * Pattern detection

---

### 🧰 Extended Ideas / Variants You Can Try

| Variant Idea                    | Code Hint                                |
| ------------------------------- | ---------------------------------------- |
| Heatmap of 4s instead of 6s     | `df[df['batsman_runs'] == 4]`            |
| Heatmap by batsman              | Add `'batsman'` to grouping              |
| Heatmap by innings (1st vs 2nd) | Filter on `'innings'` column             |
| Heatmap by team                 | Add `'batting_team'` or `'bowling_team'` |
| Animated heatmap by year        | Use `matplotlib.animation` or `plotly`   |

---

### 🔚 Final Thoughts

* Heatmaps are powerful **2D tools for quick pattern recognition**.
* They work especially well in **sports analytics**, **finance**, and **operations research**.
* Combine with filters and domain knowledge to derive **actionable insights**.

---
(01:39:49)
