### **Mise En Garde: Certaines méthodes sont exploitables en fonction de la version de Python que vous ustilisez**

In [None]:
!pip install pyspark

In [2]:
import pyspark
print(f"{pyspark.__version__}")

3.5.0


In [None]:
os.listdir(os.getcwd())

### **Point d'entrée : SparkContext**

In [5]:
sc = pyspark.SparkContext()

# Verifier si SparkContext est valide
print(sc)

<SparkContext master=local[*] appName=pyspark-shell>


In [6]:
# Check the version of SparkContext in PySpark Shell
print("La version de Spark Context dans PySpark Shell est : ", sc.version)

# Display the Python version of SparkContext
print("La version Python de Spark Context dans PySpark Shell est : ", sc.pythonVer)

# Display the master of SparkContext
print("Le maitre de Spark Context dans PySpark Shell est : ", sc.master)

La version de Spark Context dans PySpark Shell est :  3.5.0
La version Python de Spark Context dans PySpark Shell est :  3.10
Le maitre de Spark Context dans PySpark Shell est :  local[*]


### **Creating RDD**

In [8]:
myRDD = sc.parallelize(
    [('Amber', 22), ('Alfred', 23), ('skye', 4), ('Albert', 12), ('Amber', 9)]
)

In [9]:
myRDD.collect()

[('Amber', 22), ('Alfred', 23), ('skye', 4), ('Albert', 12), ('Amber', 9)]

In [10]:
myRDD.take(5)

[('Amber', 22), ('Alfred', 23), ('skye', 4), ('Albert', 12), ('Amber', 9)]

### **Reading Data From Files**


In [42]:
import os

file_path_1 = 'airport-codes-na.txt'
file_path_2 = 'departuredelays.csv'

if os.path.exists(file_path_1 and file_path_2):
    print(f"The file {file_path_1} and {file_path_2} exists.")
else:
    print(f"The file {file_path_1} and {file_path_2} does not exist.")

The file airport-codes-na.txt and departuredelays.csv exists.


In [17]:
myRDD = sc.textFile(file_path_1)

In [18]:
myRDD.take(5)

['City\tState\tCountry\tIATA',
 'Abbotsford\tBC\tCanada\tYXX',
 'Aberdeen\tSD\tUSA\tABR',
 'Abilene\tTX\tUSA\tABI',
 'Akron\tOH\tUSA\tCAK']

In [19]:
myRDD.count()

527

In [28]:
myRDD = sc.textFile(file_path_1).map(lambda line: line.split("\t"))

In [29]:
myRDD.getNumPartitions()

2

In [30]:
myRDD = sc.textFile(file_path_1, minPartitions=4, use_unicode=True).map(lambda line: line.split("\t"))

In [31]:
myRDD.take(5)

[['City', 'State', 'Country', 'IATA'],
 ['Abbotsford', 'BC', 'Canada', 'YXX'],
 ['Aberdeen', 'SD', 'USA', 'ABR'],
 ['Abilene', 'TX', 'USA', 'ABI'],
 ['Akron', 'OH', 'USA', 'CAK']]

In [32]:
myRDD.getNumPartitions()

4

In [43]:
myRDD2 = sc.textFile(file_path_2).map(lambda line: line.split(","))
myRDD2.count()

1391579

In [44]:
myRDD2 = sc.textFile(file_path_2, minPartitions=8).map(lambda line: line.split(","))
myRDD2.count()

1391579

In [45]:
myRDD2.take(5)

[['date', 'delay', 'distance', 'origin', 'destination'],
 ['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW'],
 ['01021245', '-2', '602', 'ABE', 'ATL'],
 ['01020605', '-4', '602', 'ABE', 'ATL']]

In [46]:
myRDD2.getNumPartitions()

8

### **Using DataFrame**

**SparkSession : Point d'entrée pour les Dataframes**

In [48]:
from pyspark.sql.session import SparkSession
spark = SparkSession(sc)

In [49]:
myDF = spark.read.csv(file_path_2, header=True, inferSchema=True)
myDF.count()

1391578

In [50]:
myDF.show()

+-------+-----+--------+------+-----------+
|   date|delay|distance|origin|destination|
+-------+-----+--------+------+-----------+
|1011245|    6|     602|   ABE|        ATL|
|1020600|   -8|     369|   ABE|        DTW|
|1021245|   -2|     602|   ABE|        ATL|
|1020605|   -4|     602|   ABE|        ATL|
|1031245|   -4|     602|   ABE|        ATL|
|1030605|    0|     602|   ABE|        ATL|
|1041243|   10|     602|   ABE|        ATL|
|1040605|   28|     602|   ABE|        ATL|
|1051245|   88|     602|   ABE|        ATL|
|1050605|    9|     602|   ABE|        ATL|
|1061215|   -6|     602|   ABE|        ATL|
|1061725|   69|     602|   ABE|        ATL|
|1061230|    0|     369|   ABE|        DTW|
|1060625|   -3|     602|   ABE|        ATL|
|1070600|    0|     369|   ABE|        DTW|
|1071725|    0|     602|   ABE|        ATL|
|1071230|    0|     369|   ABE|        DTW|
|1070625|    0|     602|   ABE|        ATL|
|1071219|    0|     569|   ABE|        ORD|
|1080600|    0|     369|   ABE| 

In [51]:
myDF.rdd.getNumPartitions()

2

In [52]:
myDF.printSchema()

root
 |-- date: integer (nullable = true)
 |-- delay: integer (nullable = true)
 |-- distance: integer (nullable = true)
 |-- origin: string (nullable = true)
 |-- destination: string (nullable = true)



### **RDD Transformations**

In [53]:
airports = sc.textFile(file_path_1).map(lambda line : line.split("\t"))
airports.take(5)

[['City', 'State', 'Country', 'IATA'],
 ['Abbotsford', 'BC', 'Canada', 'YXX'],
 ['Aberdeen', 'SD', 'USA', 'ABR'],
 ['Abilene', 'TX', 'USA', 'ABI'],
 ['Akron', 'OH', 'USA', 'CAK']]

In [54]:
flights = sc.textFile(file_path_2).map(lambda line : line.split(","))
flights.take(5)

[['date', 'delay', 'distance', 'origin', 'destination'],
 ['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW'],
 ['01021245', '-2', '602', 'ABE', 'ATL'],
 ['01020605', '-4', '602', 'ABE', 'ATL']]

**map()**

In [55]:
airports.map(lambda c: (c[0], c[1])).take(5)

[('City', 'State'),
 ('Abbotsford', 'BC'),
 ('Aberdeen', 'SD'),
 ('Abilene', 'TX'),
 ('Akron', 'OH')]

**filter()**

In [56]:
airports.map(lambda c: (c[0], c[1])).filter(lambda c: c[1] == "WA").take(5)

[('Bellingham', 'WA'),
 ('Moses Lake', 'WA'),
 ('Pasco', 'WA'),
 ('Pullman', 'WA'),
 ('Seattle', 'WA')]

**flatMap**

In [57]:
airports.filter(lambda c: c[1] == "WA").map(lambda c: (c[0], c[1])).flatMap(lambda x: x).take(10)

['Bellingham',
 'WA',
 'Moses Lake',
 'WA',
 'Pasco',
 'WA',
 'Pullman',
 'WA',
 'Seattle',
 'WA']

**distinct()**

In [58]:
airports.map(lambda c: c[2]).distinct().take(5)

['Country', 'USA', 'Canada']

**sample()**

In [60]:
flights.map(lambda c: c[3]).sample(False, 0.001, 123).take(5)

['ABQ', 'AEX', 'AGS', 'ANC', 'ATL']

**leftOuterJoin()**

In [61]:
flights.map(lambda c: (c[3], c[0])).take(5)

[('origin', 'date'),
 ('ABE', '01011245'),
 ('ABE', '01020600'),
 ('ABE', '01021245'),
 ('ABE', '01020605')]

In [62]:
airports.map(lambda c: (c[3], c[1])).take(5)

[('IATA', 'State'), ('YXX', 'BC'), ('ABR', 'SD'), ('ABI', 'TX'), ('CAK', 'OH')]

In [63]:
flt = flights.map(lambda c:(c[3], c[0]))
air = airports.map(lambda c:(c[3], c[1]))
flt.join(air).take(5)

[('ABE', ('01011245', 'PA')),
 ('ABE', ('01020600', 'PA')),
 ('ABE', ('01021245', 'PA')),
 ('ABE', ('01020605', 'PA')),
 ('ABE', ('01031245', 'PA'))]

In [64]:
flt = flights.map(lambda c:(c[3], c[0]))
air = airports.map(lambda c:(c[3], c[1]))
air.join(flt).take(5)

[('CAK', ('OH', '01010620')),
 ('CAK', ('OH', '01021110')),
 ('CAK', ('OH', '01020620')),
 ('CAK', ('OH', '01021725')),
 ('CAK', ('OH', '01031110'))]

**repartition()**

In [66]:
flights.getNumPartitions()

2

In [68]:
flights2 = flights.repartition(8)
flights2.getNumPartitions()

8

In [69]:
rdd = sc.parallelize([1, 2, 3, 4], 4)
def f(splitIndex, iterator): yield splitIndex
rdd.mapPartitionsWithIndex(f).sum()

6

**ZipWithIndex()**

In [70]:
# Vieac each row within RDD + the index
# i.e. output is form ([row], idx)

ac = airports.map(lambda c: (c[0], c[3]))
ac.zipWithIndex().take(5)

[(('City', 'IATA'), 0),
 (('Abbotsford', 'YXX'), 1),
 (('Aberdeen', 'ABR'), 2),
 (('Abilene', 'ABI'), 3),
 (('Akron', 'CAK'), 4)]

In [78]:
# zipWithIndex
#   Sauter la ligne d'en-tête en commencant par
#   - filtrer la ligne 0
#   - extraire uniquement les informations sur la ligne

ac.zipWithIndex().filter(lambda row_idx: row_idx[1] > 0).map(lambda row_idx: row_idx[0]).take(5)

[('Abbotsford', 'YXX'),
 ('Aberdeen', 'ABR'),
 ('Abilene', 'ABI'),
 ('Akron', 'CAK'),
 ('Alamosa', 'ALS')]

**sortByKey()**

In [79]:
# takes the origin code delays, remove header
# runs a group by origin code via reduceByKey()
# sorting by the key (origin code)


flights.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .map(lambda c: (c[3], int(c[1])))\
  .reduceByKey(lambda x,y: x + y)\
  .sortByKey()\
  .take(50)


[('ABE', 5113),
 ('ABI', 5128),
 ('ABQ', 64422),
 ('ABY', 1554),
 ('ACT', 392),
 ('ACV', 8429),
 ('ADQ', -254),
 ('AEX', 10193),
 ('AGS', 5003),
 ('ALB', 22362),
 ('ALO', 2866),
 ('AMA', 21979),
 ('ANC', 4948),
 ('ATL', 1151087),
 ('ATW', 8151),
 ('AUS', 108638),
 ('AVL', 5727),
 ('AVP', 2946),
 ('AZO', 233),
 ('BDL', 54662),
 ('BET', -645),
 ('BFL', 4022),
 ('BGR', 2852),
 ('BHM', 44355),
 ('BIL', 2616),
 ('BIS', 3825),
 ('BMI', 7817),
 ('BNA', 212243),
 ('BOI', 18004),
 ('BOS', 238602),
 ('BPT', 1936),
 ('BQK', 3952),
 ('BQN', 3943),
 ('BRO', 4967),
 ('BRW', 880),
 ('BTM', -138),
 ('BTR', 21989),
 ('BTV', 14755),
 ('BUF', 54309),
 ('BUR', 42241),
 ('BWI', 362845),
 ('BZN', 7226),
 ('CAE', 25686),
 ('CAK', 14749),
 ('CDC', 51),
 ('CDV', -1024),
 ('CEC', 2832),
 ('CHA', 7586),
 ('CHO', 2421),
 ('CHS', 30789)]

In [81]:
# Create `a` RDD of washington airports

a = airports.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .filter(lambda c: c[1] == "WA")

# Create `b` RDD of British Columbia airports

b = airports.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .filter(lambda c: c[1] == "BC")

# Union WA and BC airports

a.union(b).take(50)

[['Bellingham', 'WA', 'USA', 'BLI'],
 ['Moses Lake', 'WA', 'USA', 'MWH'],
 ['Pasco', 'WA', 'USA', 'PSC'],
 ['Pullman', 'WA', 'USA', 'PUW'],
 ['Seattle', 'WA', 'USA', 'SEA'],
 ['Spokane', 'WA', 'USA', 'GEG'],
 ['Walla Walla', 'WA', 'USA', 'ALW'],
 ['Wenatchee', 'WA', 'USA', 'EAT'],
 ['Yakima', 'WA', 'USA', 'YKM'],
 ['Abbotsford', 'BC', 'Canada', 'YXX'],
 ['Anahim Lake', 'BC', 'Canada', 'YAA'],
 ['Campbell River', 'BC', 'Canada', 'YBL'],
 ['Castlegar', 'BC', 'Canada', 'YCG'],
 ['Cranbrook', 'BC', 'Canada', 'YXC'],
 ['Fort Nelson', 'BC', 'Canada', 'YYE'],
 ['Fort Saint John', 'BC', 'Canada', 'YXJ'],
 ['Kamloops', 'BC', 'Canada', 'YKA'],
 ['Kelowna', 'BC', 'Canada', 'YLW'],
 ['Nanaimo', 'BC', 'Canada', 'YCD'],
 ['Penticton', 'BC', 'Canada', 'YYF'],
 ['Port Hardy', 'BC', 'Canada', 'YZT'],
 ['Powell River', 'BC', 'Canada', 'YPW'],
 ['Prince George', 'BC', 'Canada', 'YXS'],
 ['Prince Rupert', 'BC', 'Canada', 'YPR'],
 ['Quesnel', 'BC', 'Canada', 'YQZ'],
 ['"Sandspit, Queen Charlotte Islands"',

**Intersect()**

In [82]:
# create first RDD
a = airports.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .filter(lambda c: c[1] == "WA")\
  .map(lambda c: c[3])

In [83]:
flights.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .take(50)

[['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW'],
 ['01021245', '-2', '602', 'ABE', 'ATL'],
 ['01020605', '-4', '602', 'ABE', 'ATL'],
 ['01031245', '-4', '602', 'ABE', 'ATL'],
 ['01030605', '0', '602', 'ABE', 'ATL'],
 ['01041243', '10', '602', 'ABE', 'ATL'],
 ['01040605', '28', '602', 'ABE', 'ATL'],
 ['01051245', '88', '602', 'ABE', 'ATL'],
 ['01050605', '9', '602', 'ABE', 'ATL'],
 ['01061215', '-6', '602', 'ABE', 'ATL'],
 ['01061725', '69', '602', 'ABE', 'ATL'],
 ['01061230', '0', '369', 'ABE', 'DTW'],
 ['01060625', '-3', '602', 'ABE', 'ATL'],
 ['01070600', '0', '369', 'ABE', 'DTW'],
 ['01071725', '0', '602', 'ABE', 'ATL'],
 ['01071230', '0', '369', 'ABE', 'DTW'],
 ['01070625', '0', '602', 'ABE', 'ATL'],
 ['01071219', '0', '569', 'ABE', 'ORD'],
 ['01080600', '0', '369', 'ABE', 'DTW'],
 ['01081230', '33', '369', 'ABE', 'DTW'],
 ['01080625', '1', '602', 'ABE', 'ATL'],
 ['01080607', '5', '569', 'ABE', 'ORD'],
 ['01081219', '54', '569', 'ABE', 'ORD'],
 ['0

In [84]:
flights.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .filter(lambda c: c[1] == '0')\
  .take(50)

[['01030605', '0', '602', 'ABE', 'ATL'],
 ['01061230', '0', '369', 'ABE', 'DTW'],
 ['01070600', '0', '369', 'ABE', 'DTW'],
 ['01071725', '0', '602', 'ABE', 'ATL'],
 ['01071230', '0', '369', 'ABE', 'DTW'],
 ['01070625', '0', '602', 'ABE', 'ATL'],
 ['01071219', '0', '569', 'ABE', 'ORD'],
 ['01080600', '0', '369', 'ABE', 'DTW'],
 ['01091725', '0', '602', 'ABE', 'ATL'],
 ['01101219', '0', '569', 'ABE', 'ORD'],
 ['01150600', '0', '369', 'ABE', 'DTW'],
 ['01151230', '0', '369', 'ABE', 'DTW'],
 ['01150625', '0', '602', 'ABE', 'ATL'],
 ['01150607', '0', '569', 'ABE', 'ORD'],
 ['01151219', '0', '569', 'ABE', 'ORD'],
 ['01180625', '0', '602', 'ABE', 'ATL'],
 ['01211725', '0', '602', 'ABE', 'ATL'],
 ['01241219', '0', '569', 'ABE', 'ORD'],
 ['01271219', '0', '569', 'ABE', 'ORD'],
 ['01281219', '0', '569', 'ABE', 'ORD'],
 ['01290600', '0', '369', 'ABE', 'DTW'],
 ['01291230', '0', '369', 'ABE', 'DTW'],
 ['01301215', '0', '602', 'ABE', 'ATL'],
 ['01301230', '0', '369', 'ABE', 'DTW'],
 ['01300625', '0

In [85]:
a.take(50)

['BLI', 'MWH', 'PSC', 'PUW', 'SEA', 'GEG', 'ALW', 'EAT', 'YKM']

In [86]:
flights.take(3)

[['date', 'delay', 'distance', 'origin', 'destination'],
 ['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW']]

### **RDD Actions**

In [87]:
# take(n)

airports.take(3)

[['City', 'State', 'Country', 'IATA'],
 ['Abbotsford', 'BC', 'Canada', 'YXX'],
 ['Aberdeen', 'SD', 'USA', 'ABR']]

In [88]:
# collect()

airports.filter(lambda c: c[1] == "WA").collect()

[['Bellingham', 'WA', 'USA', 'BLI'],
 ['Moses Lake', 'WA', 'USA', 'MWH'],
 ['Pasco', 'WA', 'USA', 'PSC'],
 ['Pullman', 'WA', 'USA', 'PUW'],
 ['Seattle', 'WA', 'USA', 'SEA'],
 ['Spokane', 'WA', 'USA', 'GEG'],
 ['Walla Walla', 'WA', 'USA', 'ALW'],
 ['Wenatchee', 'WA', 'USA', 'EAT'],
 ['Yakima', 'WA', 'USA', 'YKM']]

In [89]:
# reduce(f)

flights\
  .filter(lambda c: c[3] == 'SEA' and c[4] == 'SFO')\
  .map(lambda c: int(c[1]))\
  .reduce(lambda x,y: x + y)

22293

In [90]:
flights.take(5)

[['date', 'delay', 'distance', 'origin', 'destination'],
 ['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW'],
 ['01021245', '-2', '602', 'ABE', 'ATL'],
 ['01020605', '-4', '602', 'ABE', 'ATL']]

In [91]:
# count

flights.zipWithIndex()\
  .filter(lambda row_idx: row_idx[1] > 0)\
  .map(lambda row_idx: row_idx[0])\
  .count()

1391578

In [93]:
airports.saveAsTextFile("/tmp/eti/airports/")

In [96]:
!ls /tmp/eti/airports/

part-00000  part-00001	_SUCCESS


### **Pitfalls of using RDDs**

In [97]:
### Getting Ready

flights = sc.textFile(file_path_2)\
            .map(lambda line: line.split(","))\
            .zipWithIndex()\
            .filter(lambda row_idx: row_idx[1] > 0)\
            .map(lambda row_idx: row_idx[0])

In [98]:
flights.take(5)

[['01011245', '6', '602', 'ABE', 'ATL'],
 ['01020600', '-8', '369', 'ABE', 'DTW'],
 ['01021245', '-2', '602', 'ABE', 'ATL'],
 ['01020605', '-4', '602', 'ABE', 'ATL'],
 ['01031245', '-4', '602', 'ABE', 'ATL']]

In [100]:
fligthsDF = spark.read.options(header='true', inferSchema='true').csv(file_path_2)
fligthsDF.createOrReplaceTempView("fligthsDF")

In [101]:
fligthsDF.show(5)

+-------+-----+--------+------+-----------+
|   date|delay|distance|origin|destination|
+-------+-----+--------+------+-----------+
|1011245|    6|     602|   ABE|        ATL|
|1020600|   -8|     369|   ABE|        DTW|
|1021245|   -2|     602|   ABE|        ATL|
|1020605|   -4|     602|   ABE|        ATL|
|1031245|   -4|     602|   ABE|        ATL|
+-------+-----+--------+------+-----------+
only showing top 5 rows



In [102]:
# How to do it
flights.map(lambda c: (c[3], int(c[1]))).reduceByKey(lambda x, y: x + y).sortByKey().take(50)

[('ABE', 5113),
 ('ABI', 5128),
 ('ABQ', 64422),
 ('ABY', 1554),
 ('ACT', 392),
 ('ACV', 8429),
 ('ADQ', -254),
 ('AEX', 10193),
 ('AGS', 5003),
 ('ALB', 22362),
 ('ALO', 2866),
 ('AMA', 21979),
 ('ANC', 4948),
 ('ATL', 1151087),
 ('ATW', 8151),
 ('AUS', 108638),
 ('AVL', 5727),
 ('AVP', 2946),
 ('AZO', 233),
 ('BDL', 54662),
 ('BET', -645),
 ('BFL', 4022),
 ('BGR', 2852),
 ('BHM', 44355),
 ('BIL', 2616),
 ('BIS', 3825),
 ('BMI', 7817),
 ('BNA', 212243),
 ('BOI', 18004),
 ('BOS', 238602),
 ('BPT', 1936),
 ('BQK', 3952),
 ('BQN', 3943),
 ('BRO', 4967),
 ('BRW', 880),
 ('BTM', -138),
 ('BTR', 21989),
 ('BTV', 14755),
 ('BUF', 54309),
 ('BUR', 42241),
 ('BWI', 362845),
 ('BZN', 7226),
 ('CAE', 25686),
 ('CAK', 14749),
 ('CDC', 51),
 ('CDV', -1024),
 ('CEC', 2832),
 ('CHA', 7586),
 ('CHO', 2421),
 ('CHS', 30789)]

In [104]:
flights = sc.textFile(file_path_2, minPartitions=8)\
            .map(lambda line: line.split(","))\
            .zipWithIndex()\
            .filter(lambda row_idx: row_idx[1] > 0)\
            .map(lambda row_idx: row_idx[0])

In [105]:
flights.count()

1391578

In [106]:
flights.getNumPartitions()

8

In [107]:
def count_in_a_partitions(idx, iterator):
  count = 0
  for _ in iterator:
    count += 1
  return idx, count

flights.mapPartitionsWithIndex(count_in_a_partitions).collect()

[0,
 174293,
 1,
 174020,
 2,
 173849,
 3,
 174006,
 4,
 173864,
 5,
 174308,
 6,
 173620,
 7,
 173618]

In [108]:
# How to do it
flights.map(lambda c: (c[3], int(c[1]))).reduceByKey(lambda x, y: x + y).sortByKey().take(50)

[('ABE', 5113),
 ('ABI', 5128),
 ('ABQ', 64422),
 ('ABY', 1554),
 ('ACT', 392),
 ('ACV', 8429),
 ('ADQ', -254),
 ('AEX', 10193),
 ('AGS', 5003),
 ('ALB', 22362),
 ('ALO', 2866),
 ('AMA', 21979),
 ('ANC', 4948),
 ('ATL', 1151087),
 ('ATW', 8151),
 ('AUS', 108638),
 ('AVL', 5727),
 ('AVP', 2946),
 ('AZO', 233),
 ('BDL', 54662),
 ('BET', -645),
 ('BFL', 4022),
 ('BGR', 2852),
 ('BHM', 44355),
 ('BIL', 2616),
 ('BIS', 3825),
 ('BMI', 7817),
 ('BNA', 212243),
 ('BOI', 18004),
 ('BOS', 238602),
 ('BPT', 1936),
 ('BQK', 3952),
 ('BQN', 3943),
 ('BRO', 4967),
 ('BRW', 880),
 ('BTM', -138),
 ('BTR', 21989),
 ('BTV', 14755),
 ('BUF', 54309),
 ('BUR', 42241),
 ('BWI', 362845),
 ('BZN', 7226),
 ('CAE', 25686),
 ('CAK', 14749),
 ('CDC', 51),
 ('CDV', -1024),
 ('CEC', 2832),
 ('CHA', 7586),
 ('CHO', 2421),
 ('CHS', 30789)]