In [None]:
# change current directory to the project directory
import os

# change directory
os.chdir("../")

# print current directory
os.getcwd()

In [None]:
# imports
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

In [None]:
# load data
flights = pd.read_csv(
    "data/flights_train.csv",
    parse_dates=["flight_date"]
)

flights.sort_values(
    by=["flight_date"],
    inplace=True
)

In [None]:
flights.info()

In [None]:
# summary about flight_date
flights["flight_date"].describe()

In [None]:
# summary of continuous vars
continuous_cols = ["avg_weeks", "std_weeks", "target"]

flights[continuous_cols].describe()

In [None]:
# box plot of continuous
fig = go.Figure(
    data=[
        go.Box(
            x=flights[col],
            name=col
        )
    for col in continuous_cols]
)

fig.update_layout(
    title="Box plot of continuous cols",
    yaxis_title="value"
)

fig.show()
fig.write_html("renders/boxplot-continuous-vars.html")

In [None]:
# summary of categorical vars
count_in = flights["to"].value_counts().sort_index()
count_out = flights["from"].value_counts().sort_index()

fig = go.Figure(
    data=[
        go.Bar(
            x=count_in.index,
            y=count_in.values,
            name="in traffic"
        ),
        go.Bar(
            x=count_out.index,
            y=count_out.values,
            name="out traffic"
        ),
    ]
)

fig.update_layout(
    title="Traffic of locations",
    xaxis_title="location",
    yaxis_title="count"
)

fig.show()
fig.write_html("renders/traffic-airport.html")

In [None]:
# exploration of target
fig = px.histogram(
    flights,
    x="target",
    histnorm='probability density'
)

fig.update_layout(
    title="Histogram of target"
)

fig.show()

In [None]:
# scatter matrix of continuous vars

fig = px.scatter_matrix(
    flights[continuous_cols]
)

fig.show()

In [None]:
fig = px.scatter(
    flights,
    x="avg_weeks",
    y="target",
    color="from",
)

fig.update_layout(
    yaxis_range=[5, 15]
)

fig.show()

In [None]:
# exploration of time 

fig = px.line(
    flights,
    x="flight_date",
    y="target",
    color="from"
)

fig.show()