In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)
from plotly import tools
import plotly.figure_factory as ff

df = pd.read_csv('data/flights.csv', low_memory=False)
airlines = pd.read_csv('data/airlines.csv')
df = pd.merge(df,airlines, left_on='AIRLINE', right_on = 'IATA_CODE')
df.insert(loc=5, column='AIRLINE', value=df.AIRLINE_y)
df = df.drop(['AIRLINE_y','IATA_CODE'], axis=1)

airport = pd.read_csv('data/airports.csv')
df = pd.merge(df,airport[['IATA_CODE','AIRPORT','CITY']], left_on='ORIGIN_AIRPORT', right_on = 'IATA_CODE')
df = df.drop(['IATA_CODE'], axis=1)
df = pd.merge(df,airport[['IATA_CODE','AIRPORT','CITY']], left_on='DESTINATION_AIRPORT', right_on = 'IATA_CODE')
df = df.drop(['IATA_CODE'], axis=1)
df.head(2).T

Unnamed: 0,0,1
YEAR,2015,2015
MONTH,1,1
DAY,1,1
DAY_OF_WEEK,4,4
AIRLINE_x,AS,AS
AIRLINE,Alaska Airlines Inc.,Alaska Airlines Inc.
FLIGHT_NUMBER,98,108
TAIL_NUMBER,N407AS,N309AS
ORIGIN_AIRPORT,ANC,ANC
DESTINATION_AIRPORT,SEA,SEA


In [2]:
dff = df.groupby('AIRLINE')[['CANCELLED']].mean().sort_values(by='CANCELLED', ascending=False).round(3)

In [3]:
trace1 = go.Scatter(
    x=dff.index,
    y=dff.CANCELLED
    )
data = [trace1]
py.iplot(data)

In [4]:
trace1 = go.Scatter(
    x=dff.index,
    y=dff.CANCELLED
    )

layout = go.Layout(xaxis=dict(tickangle=20),
    title='Şehirlere Göre Uçuşların İptal Edilme Oranları', yaxis = dict(title = '%')
)
data = [trace1]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [5]:
trace1 = go.Scatter(
    x=dff.index,
    y=dff.CANCELLED,
    mode='markers',
    marker=dict(symbol = 'diamond',
        sizemode = 'diameter',
        size = 20
    )
)
layout = go.Layout(xaxis=dict(tickangle=20),
    title='Şehirlere Göre Uçuşların İptal Edilme Oranları', yaxis = dict(title = '%')
)
data = [trace1]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [6]:
trace1 = go.Scatter(
    x=dff.index,
    y=dff.CANCELLED,
    mode='markers',
    marker=dict(symbol = 'diamond',
        sizemode = 'diameter',
        size = 30,
        color = dff.CANCELLED,
        colorscale='Portland',
        showscale=True
    ),
)
layout = go.Layout(xaxis=dict(tickangle=20),
    title='Şehirlere Göre Uçuşların İptal Edilme Oranları', yaxis = dict(title = '%')
)
data = [trace1]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [7]:
trace1 = go.Scatter(
    x=dff.index,
    y=dff.CANCELLED,
    mode='markers',
    marker=dict(symbol = 'star-square',
        sizemode = 'diameter',
        size = 30,
        color = dff.CANCELLED,
        colorscale='Portland',
        showscale=True
    )
)
layout = go.Layout(xaxis=dict(tickangle=20),
    title='Şehirlere Göre Uçuşların İptal Edilme Oranları', yaxis = dict(title = '%')
)
data = [trace1]
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [8]:
df['Date'] = pd.to_datetime(df[['DAY','MONTH','YEAR']])
df = df[df.MONTH < 9]
df1dm = df.resample('D', on='Date').mean()

In [9]:
hist_data = [df1dm.ARRIVAL_DELAY]
labels = ['Ortalama Gecikme']

fig = ff.create_distplot(hist_data, labels)
py.iplot(fig)

In [10]:
hist_data = [df1dm.ARRIVAL_DELAY]
labels = ['Ortalama Gecikme']
colors = ['navy']

fig = ff.create_distplot(hist_data, labels, colors=colors,
                         show_hist=False)

fig['layout'].update(title='Varıştaki Ortalama Gecikme')
py.iplot(fig)

In [11]:
hist_data = [df1dm[df1dm.DAY_OF_WEEK<6].ARRIVAL_DELAY, df1dm[df1dm.DAY_OF_WEEK==6].ARRIVAL_DELAY,
            df1dm[df1dm.DAY_OF_WEEK==7].ARRIVAL_DELAY]
labels = ['Haftaiçi', 'C.tesi','Pazar']
colors = ['navy', 'green', 'red']

fig = ff.create_distplot(hist_data, labels, colors=colors,
                         show_hist=False, bin_size=.2)

fig['layout'].update(title='Varıştaki Ortalama Gecikme')
py.iplot(fig)

In [12]:
arr = df.pivot_table(index="CITY_x",columns="DAY_OF_WEEK",values="ARRIVAL_DELAY",aggfunc=lambda x:x.mean())
arr['sum'] = arr[1] + arr[2] +arr[3]+arr[4]+arr[5]+arr[6]+arr[7]
fv = arr.sort_values(by='sum')[:8]
fv = fv.iloc[::-1]
fv = fv.drop(['sum'], axis=1)

In [13]:
trace1 = go.Heatmap(z=[fv.values[1],fv.values[2],fv.values[3],fv.values[4],fv.values[5],
                      fv.values[6],fv.values[7]],
                   x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday','Saturday','Sunday'],
                   y=fv.index.values, colorscale='Jet')

data=[trace1]
layout = go.Layout(
    title='En Düşük Gecikmeye Sahip Şehirler'
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [14]:
df['SPEED'] = 60*df['DISTANCE']/df['AIR_TIME']
dff = df.groupby('AIRLINE').SPEED.mean().to_frame().sort_values(by='SPEED',ascending=False).round(2)

correlation = df[['DAY_OF_WEEK','MONTH','ELAPSED_TIME', 'AIR_TIME', 'DISTANCE',
       'WHEELS_ON', 'TAXI_IN', 'SCHEDULED_ARRIVAL', 'ARRIVAL_TIME',
       'ARRIVAL_DELAY','SPEED']].fillna(0).corr()
cols = correlation.columns.values
corr  = correlation.values

In [15]:
trace1 = go.Heatmap(z = corr,
                   x = cols,
                   y = cols,
                   colorscale = "YlOrRd",reversescale = True
                                    ) 

layout = go.Layout(dict(title = "Korelasyon Matrisi",
                        autosize = False,
                        height  = 600,
                        width   = 800,
                        margin  = dict(l = 200),
                        yaxis   = dict(tickfont = dict(size = 8)),
                        xaxis   = dict(tickfont = dict(size = 8))
                       )
                  )
data = [trace1]
fig = go.Figure(data=data,layout=layout)
py.iplot(fig)