## Exploratory Data Analysis

Use this notebook to get familiar with the datasets we have. There is 10 questions we need to answer during the EDA.


We shouldn't limit our EDA to these 10 questions. Let's be creative :).

#### **Task 3**: Does the weather affect the delay? 
Use the API to pull the weather information for flights. There is no need to get weather for ALL flights. We can choose the right representative sample. Let's focus on four weather types:
- sunny
- cloudy
- rainy
- snow.
Test the hypothesis that these 4 delays are from the same distribution. If they are not, which ones are significantly different?

In [17]:
import requests
import json
import pandas as pd
import numpy as np
import datetime

import warnings
warnings.filterwarnings("ignore")

In [18]:
code_country_capital={
    'AT':['Austria'        , 'Vienna'    ] ,
    'BE':['Belgium'        , 'Brussels'  ] ,
    'BG':['Bulgaria'       , 'Sofia'     ] ,
    'CY':['Cyprus'         , 'Nicosia'   ] ,
    'CZ':['Czechia'        , 'Prague'    ] ,
    'DK':['Denmark'        , 'Copenhagen'] ,
    'DE':['Germany'        , 'Berlin'    ] ,
    'EE':['Estonia'        , 'Tallinn'   ] ,
    'EL':['Greece'         , 'Athens'    ] ,
    'ES':['Spain'          , 'Madrid'    ] ,
    'FI':['Finland'        , 'Helsinki'  ] ,
    'FR':['France'         , 'Paris'     ] ,
    'HR':['Croatia'        , 'Zagreb'    ] ,
    'HU':['Hungary'        , 'Budapest'  ] ,
	'IE':['Ireland'        , 'Dublin'    ] ,
    'IT':['Italy'          , 'Rome'      ] ,
    'LT':['Lithuania'      , 'Vilnius'   ] ,
    'LU':['Luxembourg'     , 'Luxembourg'] ,
    'MO':['Monaco'         , 'Monaco'    ] ,
    'MT':['Malta'          , 'Valletta'  ] ,
    'NL':['Netherlands'    , 'Amsterdam' ] ,
    'PT':['Portugal'       , 'Lisbon'    ] ,
	'PL':['Poland'         , 'Warsaw'    ] ,
    'RO':['Romania'        , 'Bucharest' ] ,
    'SE':['Sweden'         , 'Stockholm' ] ,
    'SI':['Slovenia'       , 'Ljubljana' ] ,
    'SK':['Slovakia'       , 'Bratislava'] ,
	'UL':['United Kingdom' , 'London'    ]
	}
        

In [20]:
def days_in_month(year,month):
    #     1  2  3  4  5  6  7  8  9 10 11 12
    l=[0,31,28,31,30,31,30,31,31,30,31,30,31]
    if (3020-year)%4 == 0:
        l[2]=29
    return l[month]    

In [39]:
import sys
sys.path.append('D:/0-LHL-Activities/API_KEY/')
import my_api_keys

weather_api_key = my_api_keys.api_key('worldweatheronline')

def weather_request(city,start_date,end_date):
    url = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx"
    params = {
                "key": weather_api_key,
                "q": city,
                "format": "json",
                "date": start_date,
                "enddate": end_date,
                }
    response = requests.get(url, params=params)
    data = json.loads(response.text)

    return data

In [22]:
weather_mapping = {
    'Clear': 'Sunny',
	'Sunny': 'Sunny',
    'Partly cloudy': 'Cloudy',
	'Cloudy': 'Cloudy',
    'Overcast': 'Cloudy',
    'Mist': 'Rainy',
	'Rainy': 'Rainy',
    'Patchy rain possible': 'Rainy',
    'Moderate or heavy rain shower' : 'Rainy',
    'Moderate rain at times' : 'Rainy',
    'Heavy rain at times' : 'Rainy',
    'Light rain': 'Rainy',
    'Moderate rain': 'Rainy',
    'Heavy rain': 'Rainy',
	'Snow': 'Snow',
    'Patchy snow possible': 'Snow',
    'Light snow': 'Snow',
    'Moderate snow': 'Snow',
    'Heavy snow': 'Snow',
    'Freezing fog': 'Snow',
    'Thundery outbreaks possible': 'Rainy',
    'Blizzard': 'Snow'
}

weather_mapping_code = {
    'Clear': '1',
	'Sunny': '1',
    'Partly cloudy': '2',
	'Cloudy': '2',
    'Overcast': '2',
    'Mist': '3',
	'Rainy': '3',
    'Patchy rain possible': '3',
    'Moderate or heavy rain shower' : '3',
    'Moderate rain at times' : '3',
    'Heavy rain at times' : '3',
    'Light rain': '3',
    'Moderate rain': '3',
    'Heavy rain': '3',
	'Snow': '4',
    'Patchy snow possible': '4',
    'Light snow': '4',
    'Moderate snow': '4',
    'Heavy snow': '4',
    'Freezing fog': '4',
    'Light freezing rain': '4',
    'Moderate or heavy snow showers' : '4' ,
    'Patchy moderate snow': '4',
    'Thundery outbreaks possible': '3',
    'Blizzard': '4'
}

weather_mapping_name = {
    '1': 'Sunny',
    '2': 'Cloudy',
    '3': 'Rainy',
    '4': 'Snow'
}

In [23]:
def analyse_weather_info( code , country , city , data):
    cc=[]
    co=[]
    ci=[]
    dates=[]
    temps=[]
    conditions=[]    
    
    for weather_data in data['data']['weather']:
        date = weather_data['date']
        avetemp = weather_data['avgtempC']
        
        cc.append(code)
        co.append(country)
        ci.append(city)
        dates.append(date)
        temps.append(avetemp)
        
        weather_list = []
        hourly_data = weather_data['hourly']
        for hour_data in hourly_data:
            weather = hour_data['weatherDesc'][0]['value']
            weather_list.append(weather_mapping_code.get(weather))  
            
            if 'None' in weather_list:
                print(weather)
                
        conditions.append(weather_mapping_name.get(max(weather_list)))
       
        df = pd.DataFrame({
            'country_code': cc , 
            'country'     : co ,
            'city'        : ci  , 
            'date'        : dates ,
            'temperature'        : temps,
            'Weather_Condition'  : conditions 
             })       
         

    return df

In [24]:
# Weather data
df_weather = pd.DataFrame(columns = ['country_code', 'country', 'city','date','temperature' , 'Weather_Condition'])
# Dates
year_start = 2019
mnth_start = 6
year_end   = 2022
mnth_end   = 1

cc = list(code_country_capital.keys())

for i in cc:
    country=code_country_capital[i][0]
    capital=code_country_capital[i][1]
          
    year = year_start
    mnth = mnth_start
    while True:
                
        # call weather API
        weather_data = weather_request(capital,datetime.date(year , mnth , 1 ) ,
                                               datetime.date(year , mnth , days_in_month(year,mnth)))
        
        # Analyse and arrange weather data
        df = analyse_weather_info(i , country , capital , weather_data)
        
        # Mixing dfs 
        df_weather = df_weather.append(df)
        
        mnth=mnth+1
        if mnth>12:
            year=year+1
            mnth=1
        if ( year == year_end and mnth == mnth_end+1 ):
            break
    


In [25]:
df_weather.head()

Unnamed: 0,country_code,country,city,date,temperature,Weather_Condition
0,IE,Ireland,Dublin,2019-06-01,13,Rainy
1,IE,Ireland,Dublin,2019-06-02,14,Rainy
2,IE,Ireland,Dublin,2019-06-03,12,Rainy
3,IE,Ireland,Dublin,2019-06-04,11,Rainy
4,IE,Ireland,Dublin,2019-06-05,10,Rainy


In [26]:
df_weather.to_csv('Europe_weather.csv' , index = False)