In [None]:
'''loadDataAndRegister tests'''

df = loadDataAndRegister(testFile)

# Table "airtraffic" should exists
assert spark.sql("SHOW TABLES Like 'airtraffic'").count() == 1, "there was expected to be a table called 'airtraffic'"

# Columns should have correct values
third = df.collect()[2]
correctRow = Row(Year=2008, Month=5, DayofMonth=6, DayOfWeek=2, DepTime=611,
                             CRSDepTime=615, ArrTime=729, CRSArrTime=735, UniqueCarrier='EV',
                             FlightNum=4794, TailNum='N916EV', ActualElapsedTime=78,
                             CRSElapsedTime=80, AirTime=58, ArrDelay=-6, DepDelay=-4,
                             Origin='ROA', Dest='ATL', Distance=357, TaxiIn=9, TaxiOut=11,
                             Cancelled=0, CancellationCode=None, Diverted=0, CarrierDelay=None,
                             WeatherDelay=None, NASDelay=None, SecurityDelay=None,
                             LateAircraftDelay=None).asDict()

assert third.asDict() == correctRow, "the row was expected to be %s but it was %s" % (correctRow, third.asDict())

### BEGIN HIDDEN TESTS
import csv
import random

randomCsv = "2008_randomsample.csv"
random.seed(loadDataAndRegister(randomCsv).count())

r = random.randint(0, loadDataAndRegister(randomCsv).count() - 1)
r2 = random.randint(10, loadDataAndRegister(randomCsv).count() - 10)

correctRow1 = Row(Year=2008, Month=3, DayofMonth=12, DayOfWeek=3, DepTime=1456,
                             CRSDepTime=1310, ArrTime=1555, CRSArrTime=1415, UniqueCarrier='AA',
                             FlightNum=1264, TailNum='N422AA', ActualElapsedTime=59,
                             CRSElapsedTime=65, AirTime=44, ArrDelay=100, DepDelay=106,
                             Origin='DFW', Dest='SAT', Distance=247, TaxiIn=3, TaxiOut=12,
                             Cancelled=0, CancellationCode=None, Diverted=0, CarrierDelay=69,
                             WeatherDelay=0, NASDelay=0, SecurityDelay=0,
                             LateAircraftDelay=31).asDict()

correctRow2 = Row(Year=2008, Month=4, DayofMonth=25, DayOfWeek=5, DepTime=1502,
                             CRSDepTime=1420, ArrTime=2025, CRSArrTime=1755, UniqueCarrier='AA',
                             FlightNum=1204, TailNum='N251AA', ActualElapsedTime=263,
                              CRSElapsedTime=155, AirTime=139, ArrDelay=150, DepDelay=42,
                             Origin='DFW', Dest='RDU', Distance=1062, TaxiIn=3, TaxiOut=121,
                             Cancelled=0, CancellationCode=None, Diverted=0, CarrierDelay=0,
                             WeatherDelay=0, NASDelay=112, SecurityDelay=0,
                             LateAircraftDelay=38).asDict()
df = loadDataAndRegister(randomCsv)

testObject1 = df.collect()[r]
testObject2 = df.collect()[r2]
assert testObject1.asDict() == correctRow1, "the row was expected to be %s but it was %s" % (correctRow1, testObject1.asDict())
assert testObject2.asDict() == correctRow2, "the row was expected to be %s but it was %s" % (correctRow2, testObject2.asDict())

### END HIDDEN TESTS

In [None]:
'''flightCount tests'''

data = loadDataAndRegister(testFile2)
        
correct = [Row(TailNum='N881AS', count=5),
           Row(TailNum='N886AS', count=3),
           Row(TailNum='N824AS', count=2)]

#print(flightCount(data).take(3))

correctRows(flightCount(data).take(3), correct)

### BEGIN HIDDEN TESTS

randomCsv2 = "2008_randomsample2.csv"

randomData = loadDataAndRegister(randomCsv2)

test = flightCount(randomData).take(5)

#print(test)

correct = [Row(TailNum='N769SW', count=7),
           Row(TailNum='N777QC', count=5),
           Row(TailNum='N644SW', count=4),
           Row(TailNum='N754SW', count=3),
           Row(TailNum='N515MJ', count=2)]

correctRows(test, correct)
### END HIDDEN TESTS

In [None]:
'''cancelledDueToSecurity tests'''

data = loadDataAndRegister(testFile)
correct = [Row(FlightNum=4794, Dest='JFK'), Row(FlightNum=4794, Dest='ATL')]
correctRows(cancelledDueToSecurity(data).collect(), correct)

### BEGIN HIDDEN TESTS
randomCsv = "2008_randomsample.csv" 
random.seed(loadDataAndRegister(randomCsv).count())

r = random.randint(5, loadDataAndRegister(randomCsv).count() - 1)
r2 = random.randint(5, loadDataAndRegister(randomCsv).count() - 10)
testObject = df.collect()[r]
testObject2 = df.collect()[r2]
correct = [Row(FlightNum= testObject.FlightNum, Dest=testObject.Dest),Row(FlightNum= testObject2.FlightNum, Dest=testObject2.Dest)]
randomData = loadDataAndRegister(randomCsv)
correctRows(cancelledDueToSecurity(randomData).collect(), correct)
### END HIDDEN TESTS

In [None]:
'''longestWeatherDelay tests'''

data = loadDataAndRegister(testFile)
test = longestWeatherDelay(data).first()[0]

assert test == 7, "the longest weather delay was expected to be 7 but it was %s" % test

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
test = longestWeatherDelay(randomData).first()[0]
correct = 40
assert test == correct, "the longest weather delay was expected to be %s but it was %s" % (correct, test)
### END HIDDEN TESTS

In [None]:
'''didNotFly tests'''

data = loadDataAndRegister(testFile)
test = didNotFly(data).count()

assert test == 1489, "the amount of airlines that didn't fly was expected to be 1489 but it was %s" % test

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
test = didNotFly(randomData).count()
correct = 1473
assert test == correct, "the amount of airlines that didn't fly was expected to be %s but it was %s" % (correct, test)
### END HIDDEN TESTS

In [None]:
'''flightsFromVegasToJFK tests'''

data = loadDataAndRegister(testFile)
correct = [Row(Description='Titan Airways', Num=1),
           Row(Description='Atlantic Southeast Airlines', Num=1)]
correctRows(flightsFromVegasToJFK(data).collect(), correct)

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
correct = [Row(Description='Northwest Airlines Inc.', Num=1)]  
correctRows(flightsFromVegasToJFK(randomData).collect(), correct)
### END HIDDEN TESTS

In [None]:
'''timeSpentTaxiing tests'''

data = loadDataAndRegister(testFile)
correct = [Row(airport='LAS', taxi=11.0), Row(airport='JFK', taxi=13.25)]
correctRows(timeSpentTaxiing(data).collect(), correct)

### BEGIN HIDDEN TESTS

random.seed(loadDataAndRegister(randomCsv).count() - 2)

r = random.randint(1, loadDataAndRegister(randomCsv).count() - 50)
r2 = random.randint(1, loadDataAndRegister(randomCsv).count() - 50)

randomData = loadDataAndRegister(randomCsv)
data = [timeSpentTaxiing(randomData).collect()[r], timeSpentTaxiing(randomData).collect()[r2]]
correct = [Row(airport='LAS', taxi=11.75), Row(airport='MHT', taxi=14.0)]
correctRows(data, correct)
### END HIDDEN TESTS

In [None]:
'''distanceMedian tests'''

data = loadDataAndRegister(testFile)
test = distanceMedian(data).first()[0]
assert test == 357.0, "the distance median was expected to be 357.0 but it was %s" % test

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
test = distanceMedian(randomData).first()[0]
correct = 561.0
assert test == correct, "the distance median was expected to be %s but it was %s" % (correct, test)
### END HIDDEN TESTS

In [None]:
'''score95 tests'''

data = loadDataAndRegister(testFile)
test = score95(data).first()[0]
assert test == 17.0, "the score95 was expected to be 17.0 but it was %s" % test

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
test = score95(randomData).first()[0]
correct = 39.39999999999998
assert abs(correct - test) < 0.001, "the score95 was expected to be %s but it was %s" % (correct, test)
### END HIDDEN TESTS

In [None]:
'''cancelledFlights tests'''

data = loadDataAndRegister(testFile)
correct = [Row(airport='McCarran International', city='Las Vegas', percentage=0.5),
           Row(airport='Roanoke Regional/ Woodrum ', city='Roanoke', percentage=0.25)]
correctRows(cancelledFlights(data).collect(), correct)

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
data = cancelledFlights(randomData).collect()
correctAirport = 'Dallas-Fort Worth International'
correctCity = 'Dallas-Fort Worth'
correctPercentage = 0.16666666666666666
airport = data[0].asDict()['airport']
city = data[0].asDict()['city']
percentage = data[0].asDict()['percentage']
#correct = [Row(airport='Dallas-Fort Worth International', city='Dallas-Fort Worth', percentage=0.16666666666666666)]
#correctRows(data, correct)
assert airport == correctAirport, "the airport was excepted to be %s but it was %s" % (correctAirport, airport)
assert city == correctCity, "the city was excepted to be %s but it was %s" % (correctCity, city)
assert abs(percentage - correctPercentage) < 0.001, "the percentage was excepted to be %s but it was %s" % (correctPercentage, percentage)

### END HIDDEN TESTS

In [None]:
data = loadDataAndRegister(testFile)
test = leastSquares(data)
assert test == (952.0, -56.0), "the answer was expected to be (952.0, -56.0) but it was %s" % test

### BEGIN HIDDEN TESTS

randomData = loadDataAndRegister(randomCsv)
test = leastSquares(randomData)
correct = (2.5366070211375176, 0.002342010423694157)
correct2 = (2.223607395612209, 0.01578904577023421)

case1 = abs(test[0] - correct[0]) <= 0.1 and abs(test[1] - correct[1]) <= 0.1
case2 = abs(test[0] - correct2[0]) <= 0.1 and abs(test[1] - correct2[1]) <= 0.1
assert case1 or case2,  "the answer was expected to be %s but it was %s" % (correct, test)

### END HIDDEN TESTS