In [None]:
# Writing to a text document

# Note that the text file does NOT NEED to preivously exist when writing - Python will automatically
# create the text document for you.
# The little "w" stringng in the second parameter below just means that the file will be opened in "writing" mode.
# Python understands what the "w" means.
# my_writing_file stores a File object that the open() function gives to you
# (just like a String object, an Integer object, a List object, etc.)

my_writing_file = open("text_file_1.txt","w")

# The .write() method should be clear - write the string inside to the document
my_writing_file.write("Here is the first line!\n")
my_writing_file.write("Here is the second line!\n")
my_writing_file.write("Here is the third line!\n")
# Make sure to close the file when you're done! Otherwise problems can occur later
my_writing_file.close() 
print("Done!")

# NOTE: WHEN READING OR WRITING, MAKE SURE THE FILE ITSELF IS CLOSED ON YOUR COMPUTER!
# YOUR COMPUTER MAY NOT GIVE PYTHON PERMISSION TO READ/WRITE THE FILE IF YOU HAVE IT OPEN!

In [None]:
# Reading from a text document

# CRITICAL: YOU ACTUALLY NEED TO HAVE A FILE NAMED text_file_1.txt in your default Current Working Directory
# (remember os.getcwd() from last class?) for it to work. If you want to open a file in another directory, you need
# to copy/paste the whole path (i.e. C:/Program Files/My Thing/text_file_1.txt)
# Luckily, the cell right above this one creates text_file_1.txt in your current working directory for you :)

my_reading_file = open("text_file_1.txt","r")

# Reads the WHOLE text file in one gulp into a single string
text_in_file = my_reading_file.read()

# Close it
my_reading_file.close()

# Lets see what we have?
print(text_in_file)
print("Done!")

In [None]:
# Alternate - Read a file as a list of lines of text instead one giant text blob

my_reading_file = open("text_file_1.txt","r")

# This makes a LIST, with each list element being one line of text
# Note the readLINES method!
list_of_lines = my_reading_file.readlines()
my_reading_file.close()
print(list_of_lines[1])
print("Done!")

In [None]:
# Writing to CSV Files
import unicodecsv as csv# The unicodecsv package - Creating spreadsheets with your output!
# This is a little tricky to explain. Python has a library called "csv" automatically. However, Python's generic CSV
# library has a major problem for social scientists - it doesn't work very well with non-English characters!
# It freaks out quite reguarly with the incredibly frustrating Unicode(De/En)codeError, which I pray you never get.
# Thankfully, one ingenious Python developer decided to make a "wrapper" library called unicodecsv. The unicodecsv
# library does everything that the normal csv library does, but it also accepts international text quite easily.
# IMPORTANT: YOU MUST INSTALL THE NEW LIBRARY BEFORE IT WILL WORK! To do this, open up a NEW command prompt/terminal
# and type in "pip install unicodecsv" (without quotes)

opened_file = open('test_spreadsheet.csv','wb')
# Note the "wb" here instead of "w"! Unless you're writing to a basic text file, you should almost always use
# "wb", which signifies to Python that you want to write in something called "Byte Mode", or in raw computer code
# rather than computer code translated into text. Thankfully, the CSV will still display properly becuase
# Excel is extremely good at translating computer code into text for us :)

my_csv_file = csv.writer(opened_file)
# Finally, you call the unicodecsv library. Note the "as csv" at the top of the cell - all th means is that
# when you write "csv.", Pyhton understands it to mean "unicodecsv.". It saves a fair amount of typing over time.
# The csv.writer() function takes in a regular File object (set to byte mode) as input, and spits out ("returns") 
# a new CSVFile object. Unlike normal File objects, the CSVFile object has special capabilities. 
# I've saved the CSVFile object in the my_csv_file variable.

my_variable = "Hey Look"

# For example, CSVFile objects give you a writerow() method, which is really useful!
# We can understand why normal File objects don't allow writerow() - what does writing a row even mean in a normal
# text document??
my_csv_file.writerow([my_variable,4.6543,7.3453,2,1232])
my_csv_file.writerow([123,345,567,754,"Bunny Rabbit","Niagra Falls","Python!"])
# And there, you just wrote two rows!

# NOTE: You only close the ORIGINAL File object, you DONT need to close the CSVFile object.
opened_file.close()
print("Done!")

In [None]:
# Reading from CSV Files - pretty similar

import unicodecsv as csv

# Right after it is written, there's no reason you can't read it!
read_filepath = 'test_spreadsheet.csv'

# Note the "rb" once again! Reading or writing, must be in Byte Mode!
file_to_read = open(read_filepath,'rb')
# As a cousin of csv.writer, csv.reader lets you READ a CSV file that already exists
csv_to_read = csv.reader(file_to_read)
my_new_list = []

# To read the CSVFile contents, you use a for loop.
# Each iteration of the for loop (each time it goes through the indented code), it returns you the next row
# Later on we'll be looking into advanced methods of MODIFYING CSV files, but for now we're focusing mostly
# on READING and WRITING

for idx, row in enumerate(csv_to_read):
    if idx < 4:
        print(idx)
    # Maybe we only want to get a CERTAIN COLUMN into our list
    #my_new_list.append(row[2])
    #print(row[4])
    # Lets think over the logic. If the for loop goes through every row, and in each row we pick up the 3rd
    # spreadsheet cell and put it into a list, then naturally at the very end of the for loop we have a new
    # column created that is identical to the column in the spreadsheet!
file_to_read.close()

# What does the for loop above get us? A column! (based on the index used)
print(my_new_list)
print("Done!")

In [None]:
# A short aside - Nested Dictionaries
# Just as you can have lists-inside-lists, you can have dictionaries-inside-dictionaries
# Nested dictionaries are at the core of what databases send to Python when you ask them for information
# Don't worry if the dca_airport_data variable below is incomprehensible right now, 
# just run this cell and you'll see it printed out much clearer
# 99.9% of the time, you don't have to make your own dictionaries from scratch, you just have to modify them from the
# internet

import pprint # pprint is the Pretty Print package - it prints out complex data like nested dictionaries into something readable!

dca_airport_data = {'IATA': 'DCA', 'ICAO': 'KDCA', 'city': 'Washington', 'delay': 'false', 'name': 'Ronald Reagan Washington National',
 'state': 'District of Columbia', 'status': {'avgDelay': '','closureBegin': '','closureEnd': '',
'endTime': '','maxDelay': '','minDelay': '','reason': 'No known delays for this airport.','trend': '',
'type': ''},'weather': {'meta': {'credit': "NOAA's National Weather Service",'updated': '8:52 PM Local','url': 'http://weather.gov/'},
'temp': '77.0 F (25.0 C)','visibility': 10.0,'weather': 'Mostly Cloudy','wind': 'North at 5.8mph'}}

print("Here is the regular print nested dictionary:\n")
print(dca_airport_data) # Normal print looks like a total mess!

print("\nHere is the pretty print nested dictionary:\n")
pprint.pprint(dca_airport_data) # Pretty Print does a much better job!
# It even shows you the dictionary-inside-dictionary values much more clearly!!
# take a look at the 'status' key - the value isn't a string like the others, but it's a whole dictionary itself.
# If you wanted to access the 'reason' value, for example (run this cell first to see for yourself), you would have to type
# reason_value = dca_airport_data['status']['reason']
# Just like you can use the square brackets twice for nested list indexes, the same works with nested dictionaries
print("Done!")

dca_airport_data['IATA']

In [None]:
dca_airport_data['status']['reason']

In [None]:
# NEW CELL
# Taking a page from the next workshop...
# Basic website data-grabbing
# First import the requests library
import requests

# Second, use the requests library to get a webpage
# Using requests.get() just grabs whatever is at that website
my_hyperlink = "http://mason.gmu.edu/~jlee17/python_workshop_files/example_data/index-very-simple.html"
# In this case, that whatever is a normal webpage.
# Python receives the webpage as a Response Object. Just like String, Integer, Float, etc...
my_response_object = requests.get(my_hyperlink)

# The Response object has not only methods inside it (discussed next week)...
# It also contains variables inside it, and stored in those variables are very useful objects
# One of the most useful is the text variable. It contains the webpage as a simple string
print(my_response_object.text)

In [None]:
### NEW CELL
# But now let's go back to that nested dictionary I showed you... How did I get it in the first place???
# By accessing the FAA's Database with Python!

import requests
import pprint

# The Big Word: API - Application Programming Interface
# All API means is "we have configured our data so that you can access it super-easily with a programming language"
# Huh, this hyperlink is weird, isn't it? What does it all mean?
url = 'http://services.faa.gov/airport/status/DCA?format=application/json'

# Now to access the get() function inside the requests library.
response = requests.get(url)

# Here is the critical line! Oftentimes APIs will send you data in a special web format called JSON.
# JSONs are cousins of Python dictionaries, and Python can easily convert them for you out of the Response object
my_new_dictionary = response.json()

# Let's print it and see what we got!
pprint.pprint(my_new_dictionary)

In [None]:
### NEW CELL
# What if I wanted to access multiple airports?
my_list = ["MIA","DCA","BWI","IAD","RSW"]
my_airport_dictionaries = []
import pprint

for my_element in my_list:
    url = 'http://services.faa.gov/airport/status/' + my_element + '?format=application/json'
    response = requests.get(url)
    dict_file = response.json()
    my_airport_dictionaries.append(dict_file)
    pprint.pprint(dict_file)

In [None]:
### NEW CELL
# Let's print them all together:

pprint.pprint(my_airport_dictionaries)

In [None]:
### NEW CELL
# How do we write dictionaries to files?
import unicodecsv as csv

my_file = open("airport_output.csv","wb")
csv_file = csv.writer(my_file)

csv_file.writerow(['Airport Code','City','Temperature'])
for airport_dict in my_airport_dictionaries:
    csv_file.writerow([airport_dict['IATA'],airport_dict['city'],airport_dict['weather']['temp']])
my_file.close()
print("Done!")



In [None]:
#PDF To TXT Conversion
import PyPDF2 # Wait! We don't have this package! How do we get it?

pdfFileObj = open('Sample PDF.pdf', 'rb') # Just like WRITE in byte mode, we can READ in byte mode
pdf_file_reading_object = PyPDF2.PdfFileReader(pdfFileObj)
print("There are " + str(pdf_file_reading_object.numPages) + " pages.")
mega_string = ""
new_file = open('the_text_file.txt','w')

for i in range(0,pdf_file_reading_object.numPages):
    pageObj = pdf_file_reading_object.getPage(i)
    mega_string += pageObj.extractText()
    #Remember, += just means mega_string = mega_string + pageObj.extractText()
new_file.write(mega_string)
new_file.close()
print("Complete!")

In [None]:
import unicodecsv as csv
my_input = open("6.2-male_names.txt","r")
my_output = open("my_spreadsheet.csv","wb")
my_csv = csv.writer(my_output)
my_rows = my_input.readlines()
for row in my_rows:
    new_row = row.split()
    my_csv.writerow(new_row)
my_input.close()
my_output.close()
print("Done!")

In [None]:
my_string = "Hello    World    I     am    Josh"
print(my_string.split())

'''
In-Workshop Assignment
You need to transform a text document into a spreadsheet. How can you go about doing this?

Core Functionality:
(1) Read in the file male_names.txt
(2) Open an ouput file
(3) Wrap the output file as a CSVFile Object
(4) Loop through the text from male_names.txt, writing each "part" to its own cell
(5) Close both the input and output files
(Hint: The .split() string method may be useful...)
'''

'''
Advanced Functionality:
(1) Do the same thing, but this time make sure your output file only has names that begin with the letter "B"
(Hint: String objects have a very useful startswith() method which returns True or False.
Thus, you could do if my_string.startswith("B") == True:)
'''