# Course: Data Visualization
## Author: Sandro da Silva Camargo sandrocamargo@unipampa.edu.br

This notebook aims to demonstrate various methods for plotting multivariate data.

To open this notebook in your Google Colab environment, click here.

In [None]:
# Importação das bibliotecas necessárias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.preprocessing import StandardScaler

In [None]:
# Carregar o dataset Iris
iris = sns.load_dataset('iris')

# Verificar a estrutura do dataset
print(iris.head())

In [None]:
# Plot Sepal Length vs Sepal Width
plt.figure(figsize=(6,4))
plt.scatter(iris['sepal_length'], iris['sepal_width'])
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Sepal Length vs Sepal Width')
plt.savefig('multivar1.png')
plt.show()

# Plot Petal Length vs Petal Width
plt.figure(figsize=(6,4))
plt.scatter(iris['petal_length'], iris['petal_width'])
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.title('Petal Length vs Petal Width')
plt.savefig('multivar2.png')
plt.show()

In [None]:
# Plot com cores para cada espécie
species_unique = iris['species'].unique()
colors = {'setosa':'blue', 'versicolor':'green', 'virginica':'red'}

plt.figure(figsize=(6,4))
for species in species_unique:
    subset = iris[iris['species'] == species]
    plt.scatter(subset['sepal_length'], subset['sepal_width'], color=colors[species], label=species)

plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Sepal Length vs Sepal Width (Por Espécie)')
plt.legend()
plt.savefig('multivar3.png')
plt.show()

plt.figure(figsize=(6,4))
for species in species_unique:
    subset = iris[iris['species'] == species]
    plt.scatter(subset['petal_length'], subset['petal_width'], color=colors[species], label=species)

plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.title('Petal Length vs Petal Width (Por Espécie)')
plt.legend()
plt.savefig('multivar4.png')
plt.show()

In [None]:
# Scatterplot matrix
sns.pairplot(iris, hue="species", palette=colors)
plt.savefig('multivar-matrix.png')
plt.show()

In [None]:
# Correlação
corr = iris.drop('species', axis=1).corr()
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlação entre as variáveis do Iris')
plt.savefig('multivar-corrplot.png')
plt.show()

In [None]:
# Análise de Componentes Principais (PCA)
X = iris.drop('species', axis=1)
X_std = StandardScaler().fit_transform(X)

pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_std)
pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
pca_df = pd.concat([pca_df, iris['species']], axis=1)

plt.figure(figsize=(6,4))
sns.scatterplot(x='PC1', y='PC2', hue='species', data=pca_df, palette=colors)
plt.title('PCA aplicado ao conjunto de dados Iris')
plt.savefig('multivar-pca.png')
plt.show()

In [None]:
# MDS
mds = MDS(n_components=2, random_state=42)
mds_result = mds.fit_transform(X_std)

mds_df = pd.DataFrame(data=mds_result, columns=['Dim1', 'Dim2'])
mds_df = pd.concat([mds_df, iris['species']], axis=1)

plt.figure(figsize=(6,4))
sns.scatterplot(x='Dim1', y='Dim2', hue='species', data=mds_df, palette=colors)
plt.title('MDS aplicado ao conjunto de dados Iris')
plt.savefig('multivar-mds.png')
plt.show()

In [None]:
# Gráfico de linhas (Plotando diferentes variáveis)
plt.figure(figsize=(8,6))
for i, column in enumerate(iris.columns[:-1]):
    plt.plot(iris[column], label=column)

plt.legend(loc='upper left')
plt.title("Gráfico de Linhas - Variáveis do Iris")
plt.savefig('multivar-lineplot.png')
plt.show()

In [None]:
# Gráfico de Densidade 2D (Petal Length e Petal Width)
plt.figure(figsize=(6,4))
sns.kdeplot(data=iris, x="petal_length", y="petal_width", hue="species", fill=True, palette=colors)
plt.title('Gráfico de Densidade 2D: Petal Length vs Petal Width')
plt.savefig('multivar-density2d.png')
plt.show()
