In [1]:
import numpy as np
import pandas as pd
df = pd.DataFrame({'From_To': ['LoNDon_paris', 'MAdrid_miLAN', 'londON_StockhOlm',
                               'Budapest_PaRis', 'Brussels_londOn'],
                   'FlightNumber': [10045, np.nan, 10065, np.nan, 10085],
                   'RecentDelays': [[23, 47], [], [24, 43, 87], [13], [67, 32]],
                   'Airline': ['KLM(!)', '<Air France> (12)', '(British Airways. )',
                               '12. Air France', '"Swiss Air"']})
print (df)

            From_To  FlightNumber  RecentDelays              Airline
0      LoNDon_paris       10045.0      [23, 47]               KLM(!)
1      MAdrid_miLAN           NaN            []    <Air France> (12)
2  londON_StockhOlm       10065.0  [24, 43, 87]  (British Airways. )
3    Budapest_PaRis           NaN          [13]       12. Air France
4   Brussels_londOn       10085.0      [67, 32]          "Swiss Air"


In [2]:
#数值插值填充
df['FlightNumber'] = df['FlightNumber'].interpolate().astype(int)
print (df)
print ("=======================================")

            From_To  FlightNumber  RecentDelays              Airline
0      LoNDon_paris         10045      [23, 47]               KLM(!)
1      MAdrid_miLAN         10055            []    <Air France> (12)
2  londON_StockhOlm         10065  [24, 43, 87]  (British Airways. )
3    Budapest_PaRis         10075          [13]       12. Air France
4   Brussels_londOn         10085      [67, 32]          "Swiss Air"


In [3]:
#实现字符串分割
temp = df.From_To.str.split('_', expand=True)
temp.columns = ['From', 'To']
print (temp)
print ("========================================")
#首字符大写
temp['From'] = temp['From'].str.capitalize()
temp['To'] = temp['To'].str.capitalize()
print (temp)
print ("========================================")
#删除From_To列
df = df.drop('From_To', axis=1)
#加入新temp数据
df = df.join(temp)
print(df)
print ("========================================")

       From         To
0    LoNDon      paris
1    MAdrid      miLAN
2    londON  StockhOlm
3  Budapest      PaRis
4  Brussels     londOn
       From         To
0    London      Paris
1    Madrid      Milan
2    London  Stockholm
3  Budapest      Paris
4  Brussels     London
   FlightNumber  RecentDelays              Airline      From         To
0         10045      [23, 47]               KLM(!)    London      Paris
1         10055            []    <Air France> (12)    Madrid      Milan
2         10065  [24, 43, 87]  (British Airways. )    London  Stockholm
3         10075          [13]       12. Air France  Budapest      Paris
4         10085      [67, 32]          "Swiss Air"  Brussels     London


In [4]:
#去除多余字符,采用的正则表达式，保留字母
df['Airline'] = df['Airline'].str.extract('([a-zA-Z\s]+)', expand=False).str.strip()
print (df)

print ("========================================")

   FlightNumber  RecentDelays          Airline      From         To
0         10045      [23, 47]              KLM    London      Paris
1         10055            []       Air France    Madrid      Milan
2         10065  [24, 43, 87]  British Airways    London  Stockholm
3         10075          [13]       Air France  Budapest      Paris
4         10085      [67, 32]        Swiss Air  Brussels     London


In [5]:
#将 RecentDelays 的列表拆开，取出列表中的相同位置元素作为一列，若为空值即用 NaN 代替。
delays = df['RecentDelays'].apply(pd.Series)
delays.columns = ['delay_{}'.format(n) for n in range(1, len(delays.columns)+1)]
df = df.drop('RecentDelays', axis=1).join(delays)
print (df)
#df.fillna(method='ffill',inplace=True)
#df.fillna(method='bfill',inplace=True)
means  = df.mean()
df.fillna(means,inplace=True)
# medians  = df.median()
# # df.fillna(medians,inplace=True)
print (df)

   FlightNumber          Airline      From         To  delay_1  delay_2  \
0         10045              KLM    London      Paris     23.0     47.0   
1         10055       Air France    Madrid      Milan      NaN      NaN   
2         10065  British Airways    London  Stockholm     24.0     43.0   
3         10075       Air France  Budapest      Paris     13.0      NaN   
4         10085        Swiss Air  Brussels     London     67.0     32.0   

   delay_3  
0      NaN  
1      NaN  
2     87.0  
3      NaN  
4      NaN  
   FlightNumber          Airline      From         To  delay_1    delay_2  \
0         10045              KLM    London      Paris    23.00  47.000000   
1         10055       Air France    Madrid      Milan    31.75  40.666667   
2         10065  British Airways    London  Stockholm    24.00  43.000000   
3         10075       Air France  Budapest      Paris    13.00  40.666667   
4         10085        Swiss Air  Brussels     London    67.00  32.000000   

   delay