In [1]:
import pandas as pd
import re

In [2]:
data = 'Airline Code;DelayTimes;FlightCodes;To_From\nAir Canada (!);[21, 40];20015.0;WAterLoo_NEWYork\n<Air France> (12);[];;Montreal_TORONTO\n(Porter Airways. );[60, 22, 87];20035.0;CALgary_Ottawa\n12. Air France;[78, 66];;Ottawa_VANcouvER\n""".\\.Lufthansa.\\.""";[12, 33];20055.0;london_MONTreal\n'

In [3]:
rows = data.split('\n') # getting each individual row for the table

In [4]:
rows

['Airline Code;DelayTimes;FlightCodes;To_From',
 'Air Canada (!);[21, 40];20015.0;WAterLoo_NEWYork',
 '<Air France> (12);[];;Montreal_TORONTO',
 '(Porter Airways. );[60, 22, 87];20035.0;CALgary_Ottawa',
 '12. Air France;[78, 66];;Ottawa_VANcouvER',
 '""".\\.Lufthansa.\\.""";[12, 33];20055.0;london_MONTreal',
 '']

In [5]:
columns = rows[0].split(';') # im getting the column information from the first row of the stringified table here
columns


['Airline Code', 'DelayTimes', 'FlightCodes', 'To_From']

In [6]:
column_vals = []
for r in range(1, len(rows)): # getting all the comlumn values separately and printing them out just to see what were working with
    col = []
    for c in range(len(rows[r].split(';'))):
        column_value = rows[r].split(';')[c]
        col.append(column_value) # appending each individual column value to a list
        
    column_vals.append(col) #appending each column to a list column_vals which has all column values for the table
column_vals.pop() # removing the last entry since its empty anyways
print(column_vals)

[['Air Canada (!)', '[21, 40]', '20015.0', 'WAterLoo_NEWYork'], ['<Air France> (12)', '[]', '', 'Montreal_TORONTO'], ['(Porter Airways. )', '[60, 22, 87]', '20035.0', 'CALgary_Ottawa'], ['12. Air France', '[78, 66]', '', 'Ottawa_VANcouvER'], ['""".\\.Lufthansa.\\."""', '[12, 33]', '20055.0', 'london_MONTreal']]


In [7]:
# creating the dataframe
df = pd.DataFrame((column_vals), columns = columns)


In [8]:
df #created a df from the string! its still dirty though :(

Unnamed: 0,Airline Code,DelayTimes,FlightCodes,To_From
0,Air Canada (!),"[21, 40]",20015.0,WAterLoo_NEWYork
1,<Air France> (12),[],,Montreal_TORONTO
2,(Porter Airways. ),"[60, 22, 87]",20035.0,CALgary_Ottawa
3,12. Air France,"[78, 66]",,Ottawa_VANcouvER
4,""""""".\.Lufthansa.\.""""""","[12, 33]",20055.0,london_MONTreal


In [9]:
df.dtypes

Airline Code    object
DelayTimes      object
FlightCodes     object
To_From         object
dtype: object

In [10]:
df['FlightCodes'] = pd.to_numeric(df['FlightCodes'], errors = 'coerce') #changing the type of the column to numercial so it doesnt give an error below

In [11]:
df

Unnamed: 0,Airline Code,DelayTimes,FlightCodes,To_From
0,Air Canada (!),"[21, 40]",20015.0,WAterLoo_NEWYork
1,<Air France> (12),[],,Montreal_TORONTO
2,(Porter Airways. ),"[60, 22, 87]",20035.0,CALgary_Ottawa
3,12. Air France,"[78, 66]",,Ottawa_VANcouvER
4,""""""".\.Lufthansa.\.""""""","[12, 33]",20055.0,london_MONTreal


In [None]:
#creating a function to clean up Airline codes using regex expressions

def clean_up(line):
    return re.sub(r'[^a-zA-Z\s]', '', line).strip()

df['Airline Code'] = df['Airline Code'].apply(clean_up)

#function to fill in the missing flight code values

def fill_missing_codes(df, col):
    for i in range(1, len(df)):
        if pd.isna(df[col].iloc[i]) : #if value in any cell is empty
            df[col].iloc[i] = df[col].iloc[i - 1] + 10 # fill in empty spots with the previous cell value incremented by 10 as requested
    return df

df = fill_missing_codes(df, 'FlightCodes')
df['FlightCodes'] = df['FlightCodes'].astype(int)


df['To_From'] = df['To_From'].apply(str.upper) # make the city names capital for readability

# separate to_from column

df[['To','From']] = df["To_From"].str.split('_', expand = True)
df.drop(columns = ["To_From"], inplace = True)


In [13]:
# cleaned dataset!!
df

Unnamed: 0,Airline Code,DelayTimes,FlightCodes,To,From
0,Air Canada,"[21, 40]",20015,WATERLOO,NEWYORK
1,Air France,[],20025,MONTREAL,TORONTO
2,Porter Airways,"[60, 22, 87]",20035,CALGARY,OTTAWA
3,Air France,"[78, 66]",20045,OTTAWA,VANCOUVER
4,Lufthansa,"[12, 33]",20055,LONDON,MONTREAL
