# Pandas Assignment

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = {'From_To': ['LoNDon_paris', 'MAdrid_miLAN',  
'londON_StockhOlm', 
'Budapest_PaRis', 'Brussels_londOn'], 
'FlightNumber': [10045, np.nan, 10065, np.nan, 10085], 
'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]], 
'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )', 
'12. Air France', '"Swiss Air"']}


In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline
0,LoNDon_paris,10045.0,"[23, 47]",KLM(!)
1,MAdrid_miLAN,,[],<Air France> (12)
2,londON_StockhOlm,10065.0,"[24, 43, 87]",(British Airways. )
3,Budapest_PaRis,,[13],12. Air France
4,Brussels_londOn,10085.0,"[67, 32]","""Swiss Air"""


## Handling From To Column

In [4]:
# handling from to column
df.From_To = df.From_To.str.replace('_', ' ').str.title()
temp = df.From_To.str.split(' ', expand=True)
df['From'] = temp[0]
df['To'] = temp[1]
df

Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline,From,To
0,London Paris,10045.0,"[23, 47]",KLM(!),London,Paris
1,Madrid Milan,,[],<Air France> (12),Madrid,Milan
2,London Stockholm,10065.0,"[24, 43, 87]",(British Airways. ),London,Stockholm
3,Budapest Paris,,[13],12. Air France,Budapest,Paris
4,Brussels London,10085.0,"[67, 32]","""Swiss Air""",Brussels,London


## Handling Flight Number

In [5]:
# handling flight number
# create zero array of length same to dataframe and replacing 0 to 10 where flight number is np.nan
zero_array = np.zeros(df.shape[0])
zero_array = np.where(df['FlightNumber'].isna(), 10, zero_array)
print(f"transformed zero array is : {zero_array}")

# fill missing valuse with forward fill and aero array to it
df['FlightNumber'].fillna(method='ffill', inplace = True)
df['FlightNumber'] = df['FlightNumber'] + zero_array

# converting flight number to int as it is always of type int
df['FlightNumber'] = df['FlightNumber'].astype(int)
df

transformed zero array is : [ 0. 10.  0. 10.  0.]


Unnamed: 0,From_To,FlightNumber,RecentDelays,Airline,From,To
0,London Paris,10045,"[23, 47]",KLM(!),London,Paris
1,Madrid Milan,10055,[],<Air France> (12),Madrid,Milan
2,London Stockholm,10065,"[24, 43, 87]",(British Airways. ),London,Stockholm
3,Budapest Paris,10075,[13],12. Air France,Budapest,Paris
4,Brussels London,10085,"[67, 32]","""Swiss Air""",Brussels,London


## Handling Delays

In [6]:
def delays_handling(delays):
    
    i = 1
    delay_df = pd.DataFrame()
    
    def return_delay(delay):
        if len(delay):
            temp = delay[0]
            del delay[0]
            return temp
        else:
            return np.nan
    
    while True:
        delay_list = delays.apply(return_delay)
        if False in (np.isnan(np.array(delay_list))):
            delay_df['Delay_' + str(i)] = pd.Series(delay_list)
            i += 1
        else:
            return delay_df

In [7]:
delay_df = delays_handling(df.RecentDelays)
df = pd.concat([df, delay_df], axis=1)
df.drop(labels=['RecentDelays'], axis=1, inplace=True)

In [8]:
# Rearranging our data columns

df = df[['From_To', 'From', 'To', 'Airline', 'FlightNumber', 'Delay_1',
       'Delay_2', 'Delay_3']]
df

Unnamed: 0,From_To,From,To,Airline,FlightNumber,Delay_1,Delay_2,Delay_3
0,London Paris,London,Paris,KLM(!),10045,23.0,47.0,
1,Madrid Milan,Madrid,Milan,<Air France> (12),10055,,,
2,London Stockholm,London,Stockholm,(British Airways. ),10065,24.0,43.0,87.0
3,Budapest Paris,Budapest,Paris,12. Air France,10075,13.0,,
4,Brussels London,Brussels,London,"""Swiss Air""",10085,67.0,32.0,
