# Matplotlib Tutorial
## Introduction to Data Visualization with Matplotlib
Following MOOC.fi Data Analysis with Python course

## 1. Importing Libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# For displaying plots in Jupyter
%matplotlib inline

## 2. Basic Line Plot

In [None]:
# Create data
x = np.linspace(0, 10, 100)
y = np.sin(x)

# Create plot
plt.figure(figsize=(10, 6))
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True)
plt.show()

## 3. Multiple Lines

In [None]:
x = np.linspace(0, 10, 100)

plt.figure(figsize=(10, 6))
plt.plot(x, np.sin(x), label='sin(x)', color='blue', linestyle='-')
plt.plot(x, np.cos(x), label='cos(x)', color='red', linestyle='--')
plt.plot(x, np.tan(x), label='tan(x)', color='green', linestyle='-.')

plt.title('Trigonometric Functions')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.ylim(-5, 5)
plt.show()

## 4. Scatter Plot

In [None]:
# Generate random data
np.random.seed(42)
x = np.random.randn(100)
y = np.random.randn(100)
colors = np.random.randn(100)
sizes = 1000 * np.random.randn(100) ** 2

plt.figure(figsize=(10, 6))
plt.scatter(x, y, c=colors, s=sizes, alpha=0.5, cmap='viridis')
plt.colorbar()
plt.title('Scatter Plot with Random Data')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.grid(True)
plt.show()

## 5. Bar Chart

In [None]:
# Load sales data
sales_df = pd.read_csv('../datasets/sales_data.csv')

# Group by category
category_sales = sales_df.groupby('Category')['Sales'].sum()

plt.figure(figsize=(10, 6))
category_sales.plot(kind='bar', color=['skyblue', 'coral'])
plt.title('Total Sales by Category')
plt.xlabel('Category')
plt.ylabel('Total Sales ($)')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 6. Histogram

In [None]:
# Generate random data from normal distribution
np.random.seed(42)
data = np.random.randn(1000)

plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='steelblue', edgecolor='black', alpha=0.7)
plt.title('Histogram of Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(axis='y', alpha=0.3)
plt.show()

## 7. Subplots

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

x = np.linspace(0, 2*np.pi, 100)

# Subplot 1: Sine
axes[0, 0].plot(x, np.sin(x), 'b-')
axes[0, 0].set_title('sin(x)')
axes[0, 0].grid(True)

# Subplot 2: Cosine
axes[0, 1].plot(x, np.cos(x), 'r-')
axes[0, 1].set_title('cos(x)')
axes[0, 1].grid(True)

# Subplot 3: Exponential
axes[1, 0].plot(x, np.exp(x/3), 'g-')
axes[1, 0].set_title('exp(x/3)')
axes[1, 0].grid(True)

# Subplot 4: Logarithm
axes[1, 1].plot(x, np.log(x+1), 'm-')
axes[1, 1].set_title('log(x+1)')
axes[1, 1].grid(True)

plt.tight_layout()
plt.show()

## 8. Box Plot

In [None]:
# Load student scores
students_df = pd.read_csv('../datasets/student_scores.csv')

# Create box plot for all subjects
subjects = ['Math', 'Science', 'English', 'History']
data_to_plot = [students_df[subject] for subject in subjects]

plt.figure(figsize=(10, 6))
plt.boxplot(data_to_plot, labels=subjects)
plt.title('Student Scores Distribution by Subject')
plt.ylabel('Score')
plt.grid(axis='y', alpha=0.3)
plt.show()

## 9. Pie Chart

In [None]:
# Sales by region
region_sales = sales_df.groupby('Region')['Sales'].sum()

plt.figure(figsize=(10, 8))
plt.pie(region_sales, labels=region_sales.index, autopct='%1.1f%%', startangle=90)
plt.title('Sales Distribution by Region')
plt.axis('equal')
plt.show()

## 10. Heatmap

In [None]:
# Create a correlation matrix from student scores
correlation_matrix = students_df[subjects].corr()

plt.figure(figsize=(10, 8))
plt.imshow(correlation_matrix, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)
plt.colorbar(label='Correlation')
plt.xticks(range(len(subjects)), subjects, rotation=45)
plt.yticks(range(len(subjects)), subjects)
plt.title('Correlation Heatmap of Student Scores')

# Add correlation values as text
for i in range(len(subjects)):
    for j in range(len(subjects)):
        plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}',
                ha='center', va='center', color='black')

plt.tight_layout()
plt.show()

## Practice Exercise
Try to solve the following using the weather_data.csv:
1. Load the weather dataset
2. Create a line plot showing temperature trends for all cities
3. Create a bar chart comparing average humidity by city
4. Create a scatter plot of Temperature vs Humidity
5. Create subplots showing different weather metrics for each city

In [None]:
# Your solution here
