# 1. Load and Import Libraries and Datasets



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_covtype, load_breast_cancer

import tensorflow as tf
from tensorflow.data import Dataset

pd.set_option('display.max_columns', None)
print("Versão do TensorFlow:", tf.__version__)

## 1.1 CoverType Dataset

This code loads the Covertype dataset, separates the features and target, and creates a pandas DataFrame with appropriate column names. The target variable is then added as a new column to the DataFrame.


In [None]:
covtype = fetch_covtype()
X_cov, y_cov = covtype.data, covtype.target

columns_cov = covtype.feature_names
df_cov = pd.DataFrame(X_cov, columns=columns_cov)
df_cov["target"] = y_cov

### Basic Information

Displays a summary of the dataset, including the number of entries, column names, and data types.

In [None]:
print("\n=== CoverType Resume Info  ===")
display(df_cov.info())

Shows basic statistical details of the numerical features, such as mean, standard deviation, min, max, and percentiles.

In [None]:
print("\nStatistic Resume of the Features:")
display(df_cov.describe())

Displays the first five rows of the dataset to give an overview of its structure and contents.

In [None]:
print("\n=== First lines of CoverType ===")
display(df_cov.head())

Prints the number of unique classes in the target variable and their corresponding names.

In [None]:
print(f"Number of classes: {len(np.unique(y_cov))}")
print(f"Name of the Classes: {covtype.target_names}")

## 1.2 Breast Cancer Dataset

This code loads the Breast Cancer dataset, separates the features and target, and creates a pandas DataFrame with appropriate column names. The target variable is then added as a new column to the DataFrame.

In [None]:
cancer = load_breast_cancer()
X_cancer, y_cancer = cancer.data, cancer.target

columns_cancer = cancer.feature_names
df_cancer = pd.DataFrame(X_cancer, columns=columns_cancer)
df_cancer["target"] = y_cancer

### Basic Information

Displays a summary of the dataset, including the number of entries, column names, and data types.

In [None]:
print("\n=== Breast Cancer Resume Info  ===")
display(df_cancer.info())

Shows basic statistical details of the numerical features, such as mean, standard deviation, min, max, and percentiles.

In [None]:
print("\nStatistic Resume of the Features:")
display(df_cancer.describe())

Displays the first five rows of the dataset to give an overview of its structure and contents.

In [None]:
print("\n=== First lines of Breast Cancer ===")
display(df_cancer.head())

Prints the number of unique classes in the target variable and their corresponding names.

In [None]:
print(f"Number of classes: {len(np.unique(y_cancer))}")
print(f"Name of the Classes: {cancer.target_names}")

# 2. Exploratory Data Analysis

## 2.1 CoverType

In [None]:
def check_skewness(df, threshold=1.0):
    skew_values = df.drop(columns=["target"]).skew().sort_values(ascending=False)
    skewed = skew_values[abs(skew_values) > threshold]
    return skewed

# --- CoverType Dataset ---
print("=== CoverType Dataset ===")

# Skewed Features
skewed_cov = check_skewness(df_cov)
print(f"\nSkewed features in CoverType (|skew| > 1):\n{skewed_cov}")

# Correlation (for multicollinearity)
corr_matrix_cov = df_cov.drop(columns=["target"]).corr()
plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix_cov, cmap='coolwarm', center=0, square=True, cbar_kws={'shrink': .5})
plt.title("CoverType - Feature Correlation Heatmap")
plt.show()

# --- Breast Cancer Dataset ---
print("\n=== Breast Cancer Dataset ===")

# Skewed Features
skewed_cancer = check_skewness(df_cancer)
print(f"\nSkewed features in Breast Cancer (|skew| > 1):\n{skewed_cancer}")

# Correlation
corr_matrix_cancer = df_cancer.drop(columns=["target"]).corr()
plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix_cancer, cmap='coolwarm', center=0, square=True, cbar_kws={'shrink': .5})
plt.title("Breast Cancer - Feature Correlation Heatmap")
plt.show()