In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
from plotly import tools
import plotly.figure_factory as ff

df = pd.read_csv('data/flights.csv', low_memory=False)
airlines = pd.read_csv('data/airlines.csv')
df = pd.merge(df,airlines, left_on='AIRLINE', right_on = 'IATA_CODE')
df.insert(loc=5, column='AIRLINE', value=df.AIRLINE_y)
df = df.drop(['AIRLINE_y','IATA_CODE'], axis=1)

airport = pd.read_csv('data/airports.csv')
df = pd.merge(df,airport[['IATA_CODE','AIRPORT','CITY']], left_on='ORIGIN_AIRPORT', right_on = 'IATA_CODE')
df = df.drop(['IATA_CODE'], axis=1)
df = pd.merge(df,airport[['IATA_CODE','AIRPORT','CITY']], left_on='DESTINATION_AIRPORT', right_on = 'IATA_CODE')
df = df.drop(['IATA_CODE'], axis=1)
df.head(2).T

Unnamed: 0,0,1
YEAR,2015,2015
MONTH,1,1
DAY,1,1
DAY_OF_WEEK,4,4
AIRLINE_x,AS,AS
AIRLINE,Alaska Airlines Inc.,Alaska Airlines Inc.
FLIGHT_NUMBER,98,108
TAIL_NUMBER,N407AS,N309AS
ORIGIN_AIRPORT,ANC,ANC
DESTINATION_AIRPORT,SEA,SEA


In [2]:
data = df['AIRPORT_x'].value_counts()[:10]
label = data.index
size = data.values

trace = go.Pie(labels=label, values=size)
data = [trace]
py.iplot(data)

In [3]:
colors = ['skyblue', '#FEBFB3', '#96D38C', '#D0F9B1', 'gold', 'orange', 'lightgrey', 'lightblue','lightgreen','aqua']
trace = go.Pie(labels=label, values=size, marker=dict(colors=colors), hole = .2)
data = [trace]
py.iplot(data)

In [4]:
trace = go.Pie(labels=label, values=size, marker=dict(colors=colors), hole = .2)
layout = go.Layout(
    title='Havaalanı Dağılımı'
)
data = [trace]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [5]:
trace = go.Pie(labels=label, values=size, marker=dict(colors=colors), hole = .2,textposition = "outside")
layout = go.Layout(
    title='Havaalanı Dağılımı',legend=dict(orientation="h")
)
data = [trace]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [6]:
df1 = df.MONTH.value_counts().to_frame().reset_index().sort_values(by='index')
df1.columns = ['month', 'flight_num']
month = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May',
            6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
df1.month = df1.month.map(month)

In [7]:
trace = go.Bar(
    x=df1.month,
    y=df1.flight_num, 
)
data = [trace]
py.iplot(data)

In [8]:
trace = go.Bar(
    x=df1.month,
    y=df1.flight_num,  marker=dict(
        colorscale='Jet',
        showscale=True,
        color = df1.flight_num

    )
)
data = [trace]
py.iplot(data)

In [9]:
trace = go.Bar(
    x=df1.month,
    y=df1.flight_num,  marker=dict(
        color = df1.flight_num,
        colorscale='Jet',
        showscale=True
    )
)

layout = go.Layout(
    title='Aylara Göre İptal Edilen Uçuşlar', yaxis = dict(title = 'Uçuş Sayısı')
)
data = [trace]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [10]:
reason={'A':'Airline/Carrier', 'B':'Weather', 'C':'National Air System', 'D':'Security'}
df.CANCELLATION_REASON = df.CANCELLATION_REASON.map(reason)

df2 = df[df.CANCELLATION_REASON=='Weather'].MONTH.value_counts()
df2 = df2.to_frame().sort_index()
df2.index = df2.index.map(month)

df3 = df[df.CANCELLATION_REASON=='Airline/Carrier'].MONTH.value_counts()
df3 = df3.to_frame().sort_index()
df3.index = df3.index.map(month)

df4 = df[df.CANCELLATION_REASON=='National Air System'].MONTH.value_counts()
df4 = df4.to_frame().sort_index()
df4.index = df4.index.map(month)

In [11]:
trace1 = go.Bar(
    x=df2.index,
    y=df2.MONTH,
    name = 'Weather',
    marker=dict(
        color = ('aqua')
    )
)

trace2 = go.Bar(
    x=df3.index,
    y=df3.MONTH,
    name='Airline/Carrier',
    marker=dict(
        color = ('red')
    )
)

trace3 = go.Bar(
    x=df4.index,
    y=df4.MONTH,
    name='National Air System',
    marker=dict(
        color = ('navy')
    )
)

layout = go.Layout(
    title='Uçuş İptal Sebepleri', yaxis = dict(title = 'Uçuş Sayısı')
)
data = [trace1,trace2,trace3]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [12]:
### BONUS1
df5 = df.groupby('AIRLINE').DEPARTURE_DELAY.mean().to_frame().sort_values(by='DEPARTURE_DELAY',ascending=False).round(2)
df6 = df.groupby('AIRLINE').ARRIVAL_DELAY.mean().to_frame().sort_values(by='ARRIVAL_DELAY',ascending=False).round(2)

In [13]:
trace1 = go.Bar(
    x=df5.index,
    y=df5.DEPARTURE_DELAY,
    name='departure_delay',
    marker=dict(
        color = ('aqua')
    ),
)

trace2 = go.Bar(
    x=df6.index,
    y=df6.ARRIVAL_DELAY,
    name='arrival_delay',
    marker=dict(
        color = ('red')
    ),
)

data = [trace1, trace2]
layout = go.Layout(xaxis=dict(tickangle=15), title='Havayollarına Göre Ortalama Varış ve Kalkış Gecikmeleri',
    yaxis = dict(title = 'dakika'), barmode='stack'
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)