In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from tabulate import tabulate

In [None]:
df = pd.read_csv("Datasets/airport.csv")
df.columns = [
    "Airport", "City", "ScheduledDepartures", "PerformedDepartures", "EnplanedPassengers", 
    "EnplanedRevenueFreightTons", "EnplanedRevenueMailTons",
]

In [None]:
# Build histograms for every variable
df.hist(bins=50, figsize=(12,8))

In [None]:
# Build boxplots for every variable
columns = df.columns[2:]
fig, axes = plt.subplots(nrows=len(columns), ncols=1, figsize=(10, 15))

for i in range(len(columns)):
    axes[i].boxplot(
        df[columns[i]], vert=False, patch_artist=True,
        boxprops=dict(facecolor="lightblue", color="blue"),
        medianprops=dict(color="red"),
        whiskerprops=dict(color="blue"),
        capprops=dict(color="blue"),
        flierprops=dict(markerfacecolor="orange", marker="o", markersize=5),
    )
    axes[i].set_title(columns[i])
    axes[i].grid(True, axis="x", linestyle="--")

plt.tight_layout()
plt.show()

In [None]:
# Numerical characteristics of all variables
numerical_df = df.select_dtypes(include=["number"])
table = pd.DataFrame({
    "Mean": numerical_df.mean(),
    "Variance": numerical_df.var(),
    "Deviation": numerical_df.std(),
    "Median": numerical_df.median(),
    "Q1": numerical_df.quantile(0.25),
    "Q3": numerical_df.quantile(0.75),
})
table.columns = ["Mean", "Variance", "Deviation", "Median", "Q1", "Q3"]
print(tabulate(table, headers="keys",  tablefmt="fancy_grid"))

In [None]:
# Coefficients of correlation between variables
correlations = df.select_dtypes(include=["number"]).corr()
print(tabulate(correlations, headers="keys", tablefmt="fancy_grid"))