In [7]:
import pandas as pd
from pandas_profiling import ProfileReport
import numpy as np
import plotly.graph_objects as go


In [8]:
# loading the dataframe
df = pd.read_csv("CleanedFlights.csv")


# loading a dataframe with US-Holidays for more context
dfUSHolidays = pd.read_csv("USHolidayDates(2004-2021).csv")

# Dropping the cancelled Flights, because they are not relevant for this visualization

print("Dataframe length before cleaning: ", len(df))
df = df[df["CANCELLED"] == 0]
print("Dataframe length after cleaning: ", len(df))



Columns (7,8) have mixed types. Specify dtype option on import or set low_memory=False.



Dataframe length before cleaning:  5819079
Dataframe length after cleaning:  5729195


In [9]:
# Filtering the Holiday Dataframe

dfUSHolidays = dfUSHolidays[dfUSHolidays["Year"] == 2015]

# creating a dictionary with the date as key and holiday as value

holidayDict = pd.Series(dfUSHolidays.Holiday.values, index = dfUSHolidays.Date).to_dict()

# Enriching the flight Dataframe with this data

df.insert(column = "HOLIDAY", loc = 1, value = "") 
df["HOLIDAY"] = df["FULL_DATE"].map(holidayDict)
df["HOLIDAY"] = df["HOLIDAY"].fillna(" ")
df.head()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0.1,Unnamed: 0,HOLIDAY,FULL_DATE,YEAR,MONTH,DAY,DAY_OF_WEEK,AIRLINE,ORIGIN_AIRPORT,DESTINATION_AIRPORT,...,DEPARTURE_TIME,DEPARTURE_DELAY,SCHEDULED_TIME,ELAPSED_TIME,AIR_TIME,DISTANCE,SCHEDULED_ARRIVAL,ARRIVAL_TIME,ARRIVAL_DELAY,CANCELLED
0,0,New Year's Day,2015-01-01,2015,1,1,THURSDAY,AS,ANC,SEA,...,23:54.0,-11.0,205.0,194.0,169.0,1448,430,408.0,-22.0,0
1,1,New Year's Day,2015-01-01,2015,1,1,THURSDAY,AA,LAX,PBI,...,00:02.0,-8.0,280.0,279.0,263.0,2330,750,741.0,-9.0,0
2,2,New Year's Day,2015-01-01,2015,1,1,THURSDAY,US,SFO,CLT,...,00:18.0,-2.0,286.0,293.0,266.0,2296,806,811.0,5.0,0
3,3,New Year's Day,2015-01-01,2015,1,1,THURSDAY,AA,LAX,MIA,...,00:15.0,-5.0,285.0,281.0,258.0,2342,805,756.0,-9.0,0
4,4,New Year's Day,2015-01-01,2015,1,1,THURSDAY,AS,SEA,ANC,...,00:24.0,-1.0,235.0,215.0,199.0,1448,320,259.0,-21.0,0


In [10]:
# calculating the average flighs per month

januaryMean = df[df["MONTH"] == 1].count()["YEAR"] / 31
februaryMean = df[df["MONTH"] == 2].count()["YEAR"] / 28
marchMean = df[df["MONTH"] == 3].count()["YEAR"] / 31
aprilMean = df[df["MONTH"] == 4].count()["YEAR"] / 30
mayMean = df[df["MONTH"] == 5].count()["YEAR"] / 31
juneMean = df[df["MONTH"] == 6].count()["YEAR"] / 30
julyMean = df[df["MONTH"] == 7].count()["YEAR"] / 31
augustMean = df[df["MONTH"] == 8].count()["YEAR"] / 31
septemberMean = df[df["MONTH"] == 9].count()["YEAR"] / 30
octoberMean = df[df["MONTH"] == 10].count()["YEAR"] / 31
novemberMean = df[df["MONTH"] == 11].count()["YEAR"] / 30
decemberMean = df[df["MONTH"] == 12].count()["YEAR"] / 31

In [11]:
# Visualization of Number of Flights by each day

# creating sub-dataframes with the wanted information



flightsPerDay = pd.DataFrame(df.groupby(["FULL_DATE"]).count()["YEAR"])
flightsPerDay.insert(column = "HOLIDAY", loc = 0, value = "") 
flightsPerDay["HOLIDAY"] = flightsPerDay.index.map(holidayDict)
flightsPerDay["HOLIDAY"] = flightsPerDay["HOLIDAY"].fillna("-")
flightsPerMonth = pd.DataFrame({"FULL_DATE": ["2015-01-01","2015-02-14", "2015-03-15", "2015-04-15", "2015-05-15", "2015-06-15", "2015-07-15", "2015-08-15", "2015-09-15", "2015-10-15", "2015-11-15", "2015-12-31"],"COUNT" : [januaryMean, februaryMean, marchMean, aprilMean, mayMean, juneMean, julyMean, augustMean, septemberMean, octoberMean, novemberMean, decemberMean]})
monthlyDates = pd.to_datetime(flightsPerMonth["FULL_DATE"])

In [12]:

customdataFlight = flightsPerDay.drop(columns ="YEAR")
customdataMonth = pd.DataFrame(
                            {"Index": ["1","2","3","4","5","6","7","8","9","10","11","12"],
                            "Month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]})
fig = go.Figure()
line_flight = go.Line(y = flightsPerDay["YEAR"], x = flightsPerDay.index, name = "Flights per day", fill = "tozeroy", opacity = 0,
                    customdata = customdataFlight,
                    hovertemplate = "Flights: %{y} " +\
                    "<br>Date: %{x}" +\
                    "<br>Holiday: %{customdata}")
line_averageNumbers = go.Line(y=flightsPerMonth["COUNT"], x = monthlyDates , name = "Average flights per day in month", line_color = "black",
                    customdata = customdataMonth["Month"],
                    hovertemplate = "Month: %{customdata}"
                    "<br>Average Flights per Day: %{y}")
fig.add_trace(line_flight)
fig.add_trace(line_averageNumbers)
fig.update_traces(xcalendar = "gregorian")
fig.update(layout_yaxis_range = [7000, 18500], layout_xaxis_range = ["2015-01-01", "2015-12-31"])
fig.show()



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


