<a href="https://colab.research.google.com/github/M-110/automate-the-boring-stuff/blob/main/16_Working_with_CSV_files_and_JSON_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://nostarch.com/download/Automate_the_Boring_Stuff_onlinematerials_v.2.zip -q
!unzip -q Automate_the_Boring_Stuff_onlinematerials_v.2.zip
!rm Automate_the_Boring_Stuff_onlinematerials_v.2.zip
!mv automate_online-materials materials

# CSV

In [None]:
import csv

In [None]:
with open('materials/example.csv') as file:
  reader = csv.reader(file)
  print(list(reader))

[['4/5/2014 13:34', 'Apples', '73'], ['4/5/2014 3:41', 'Cherries', '85'], ['4/6/2014 12:46', 'Pears', '14'], ['4/8/2014 8:59', 'Oranges', '52'], ['4/10/2014 2:07', 'Apples', '152'], ['4/10/2014 18:10', 'Bananas', '23'], ['4/10/2014 2:40', 'Strawberries', '98']]


In [None]:
with open('materials/example.csv') as file:
  reader = csv.reader(file)
  for row in reader:
    print(row)

['4/5/2014 13:34', 'Apples', '73']
['4/5/2014 3:41', 'Cherries', '85']
['4/6/2014 12:46', 'Pears', '14']
['4/8/2014 8:59', 'Oranges', '52']
['4/10/2014 2:07', 'Apples', '152']
['4/10/2014 18:10', 'Bananas', '23']
['4/10/2014 2:40', 'Strawberries', '98']


## Create CSV

In [None]:
with open('output.csv', 'w') as file:
  writer = csv.writer(file)
  writer.writerow(['Name', 'Fake Name', 'Hidden Name', 'Ultimate Name'])
  writer.writerow(['Clark', 'Superman', 'Kal-El', 'Superbaby'])

In [None]:
!cat output.csv

Name,Fake Name,Hidden Name,Ultimate Name
Clark,Superman,Kal-El,Superbaby


In [None]:
with open('output.csv', 'w') as file:
  writer = csv.writer(file, delimiter='|', lineterminator='\n\n')
  writer.writerow(list('ABCD'))
  writer.writerow([1,2,3,4])
  writer.writerow([5,6,7,8])

In [None]:
!cat output.csv

A|B|C|D

1|2|3|4

5|6|7|8



## CSV dicts

In [None]:
with open('materials/example.csv') as file:
  d = csv.DictReader(file, ['time', 'name', 'amount'])
  for row in d:
    print(row['name'], row['amount'])

Apples 73
Cherries 85
Pears 14
Oranges 52
Apples 152
Bananas 23
Strawberries 98


In [None]:
with open('output.csv', 'w') as file:
  writer = csv.DictWriter(file, ['Name', 'Pet', 'Phone'])
  writer.writeheader()
  writer.writerow({'Name': 'alice', 'Pet': 'rabbit', 'Phone': 'pixel'})
  writer.writerow({'Name': 'bob', 'Pet': 'walrus', 'Phone': 'nokia'})

In [None]:
!cat output.csv

Name,Pet,Phone
alice,rabbit,pixel
bob,walrus,nokia


# Project: Removing the Header from CSV Files

In [None]:
%%writefile remove_headers.py
#!/usr/bin/env python
"""Remove headers from all csv files in a directory and resave them."""
import argparse
import csv
from pathlib import Path


def main():
  args = get_args()
  dir = Path(args.directory)
  filenames = dir.glob('*.csv')
  output_dir = Path('headers_removed')
  output_dir.mkdir(exist_ok=True)
  for filename in filenames:
    with \
        open(filename) as file, \
        open(output_dir / filename.name, 'w') as output:
      csv_reader = csv.reader(file)
      csv_writer = csv.writer(output)
      next(csv_reader)
      for row in csv_reader:
        csv_writer.writerow(row)
  print('Saved files to "headers_removed/"')


def get_args():
  """Get arguments from command line."""
  parser = argparse.ArgumentParser(
      description='Remove headers from all csv files in a directory'
  )
  parser.add_argument('directory',
                      help='directory to find csv files')
  return parser.parse_args()


if __name__ == '__main__':
  main()


Overwriting remove_headers.py


In [None]:
!python remove_headers.py materials

Saved files to "headers_removed/"


# JSON and APIs

In [None]:
import json

In [None]:
json_string = '{"name": "Zophie", "isCat": true, "miceCaught": 0, "felineIQ": null}'

In [None]:
!echo '{"name": "Zophie", "isCat": true, "miceCaught": 0,"felineIQ": null}' > test.json

In [None]:
json.loads(json_string)

{'felineIQ': None, 'isCat': True, 'miceCaught': 0, 'name': 'Zophie'}

In [None]:
with open('test.json') as json_file:
  print(json.load(json_file))

{'name': 'Zophie', 'isCat': True, 'miceCaught': 0, 'felineIQ': None}


In [None]:
d = dict(name='Python', version='3.7', os='linux')

In [None]:
json.dumps(d)

'{"name": "Python", "version": "3.7", "os": "linux"}'

In [None]:
with open('test2.json', 'w') as json_output:
  json.dump(d, json_output)

In [None]:
!cat test2.json

{"name": "Python", "version": "3.7", "os": "linux"}

# Project: Fetching Current Weather Data

In [None]:
from getpass import getpass

In [None]:
with open('API_KEY', 'w') as key_file:
  key_file.write(getpass('api_key'))

api_key··········


In [None]:
%%writefile weather.py
#!/usr/bin/env python
"""Get the daily forecast for a city."""
import argparse
import datetime
import json

import requests

API_URL = 'https://api.openweathermap.org/data/2.5/forecast'

WEATHER_TEMPLATE = """-----------+
{}|
===========|
 High: {}|
 Low: {}|
 Wind: {}|
 {}|
-----------+"""

LEFT_BAR = """+
|
|
|
|
|
|
+"""

WEEKDAYS = ['Monday', 'Tuesday', 'Wednesday','Thursday',
            'Friday', 'Saturday', 'Sunday']
    

def main():
  args = get_args()
  api_key = get_api_key()
  params = {'q': args.query, 'appid': api_key, 'units': 'imperial'}
  weather_data = json.loads(requests.get(API_URL, params=params).text)
  weather_boxes = [create_forecast_string(day_data)
                   for day_data in weather_data['list'][::7]] 
  weather_boxes = [LEFT_BAR] + weather_boxes
  text_result = combine_columns(weather_boxes)
  print(f"{weather_data['city']['name']}:")
  print(text_result)


def get_args():
  """Get arguments from command line."""
  parser = argparse.ArgumentParser(
      description='Get the daily weather forecast for a city'
  )
  parser.add_argument('query',
                      help='Location to search for (City)')
  return parser.parse_args()


def get_api_key():
  """Return the API key from the API key file."""
  with open('API_KEY') as key_file:
    return key_file.read()


def create_forecast_string(day_data):
  """Create a string in the shape of a box with the weather data."""
  date = datetime.datetime.fromtimestamp(day_data['dt'])
  high = round(day_data['main']['temp_max'])
  low = round(day_data['main']['temp_min'])
  wind = round(day_data['wind']['speed'])
  weather = day_data['weather'][0]['main']
  day_of_week = WEEKDAYS[date.weekday()]
  return WEATHER_TEMPLATE.format(
      day_of_week.center(11),
      str(high).ljust(4),
      str(low).ljust(5),
      str(wind).ljust(4),
      weather.ljust(10)
      )


def combine_columns(columns):
  """Combine the boxes together horizontally."""
  rows = zip(*[box.split('\n') for box in columns])
  return '\n'.join(''.join(row) for row in rows)

if __name__ == '__main__':
  main()


Overwriting weather.py


In [None]:
!chmod +x weather.py

In [None]:
!./weather.py fargo

Fargo:
+-----------+-----------+-----------+-----------+-----------+-----------+
|   Friday  |  Saturday |   Sunday  |   Monday  |  Tuesday  | Wednesday |
| High: 45  | High: 46  | High: 42  | High: 33  | High: 43  | High: 44  |
| Low: 45   | Low: 46   | Low: 42   | Low: 33   | Low: 43   | Low: 44   |
| Wind: 6   | Wind: 9   | Wind: 13  | Wind: 6   | Wind: 20  | Wind: 15  |
| Clouds    | Clouds    | Clouds    | Clouds    | Clouds    | Rain      |
+-----------+-----------+-----------+-----------+-----------+-----------+


# Practice Project: Excel-to-CS converter

In [None]:
!mkdir excel_spreadsheets
!unzip -q materials/excelSpreadsheets.zip -d excel_spreadsheets

In [None]:
%%writefile excel_to_csv.py
#!/usr/bin/env python
"""Convert all excel files in a directory to csv."""
import argparse
import csv
from pathlib import Path

import openpyxl


def main():
  args = get_args()
  dir = Path(args.directory)
  output_dir = Path(dir.name + '_xlsx_to_csv_conversions/')
  output_dir.mkdir(exist_ok=True)
  filenames = dir.glob('*.xlsx')
  count = 0
  for filename in filenames:
    count += 1
    convert_to_csv(filename, output_dir)
  print(f'Converted {count} files to csv. Saved in {output_dir}')


def get_args():
  """Get arguments from command line."""
  parser = argparse.ArgumentParser(
      description='Convert all excel files in a directory to csv'
  )
  parser.add_argument('directory',
                      help='Directory containing the excel files')
  return parser.parse_args()


def convert_to_csv(filename, output_dir):
  """Convert the given .xlsx file to a csv file."""
  output_name = output_dir / (filename.name.split('.')[0] + '.csv')
  sheet = openpyxl.load_workbook(filename, read_only=True).active
  with open(output_name, 'w') as output_file:
    csv_writer = csv.writer(output_file)
    for row in sheet:
      if not any(cell.value for cell in row):
        continue
      csv_writer.writerow(cell.value for cell in row)


if __name__ == '__main__':
  main()


Overwriting excel_to_csv.py


In [None]:
!chmod +x excel_to_csv.py

In [None]:
!./excel_to_csv.py excel_spreadsheets

Converted 26 files to csv. Saved in excel_spreadsheets_xlsx_to_csv_conversions


In [None]:
!ls excel_spreadsheets_xlsx_to_csv_conversions

spreadsheet-A.csv  spreadsheet-H.csv  spreadsheet-O.csv  spreadsheet-V.csv
spreadsheet-B.csv  spreadsheet-I.csv  spreadsheet-P.csv  spreadsheet-W.csv
spreadsheet-C.csv  spreadsheet-J.csv  spreadsheet-Q.csv  spreadsheet-X.csv
spreadsheet-D.csv  spreadsheet-K.csv  spreadsheet-R.csv  spreadsheet-Y.csv
spreadsheet-E.csv  spreadsheet-L.csv  spreadsheet-S.csv  spreadsheet-Z.csv
spreadsheet-F.csv  spreadsheet-M.csv  spreadsheet-T.csv
spreadsheet-G.csv  spreadsheet-N.csv  spreadsheet-U.csv


In [None]:
!cat excel_spreadsheets_xlsx_to_csv_conversions/spreadsheet-A.csv | head -n 5

ANAGRAMS,ASSIGNEE,ANGLICANS,ASTROPHYSICAL,ALIASED,ADOPTING,ALLEVIATES,AFFLICTING,ADDED,ASPERSION
ASSISTANT,ASSAILING,ALSO,ALDRICH,ACHING,ADDUCT,ANGELA,ADMIXES,ALLAYED,ALBA
ALGOL,ALLEYWAYS,ADVISORS,ACTIVIST,AMENDMENT,AFFECTINGLY,AUTOGRAPHED,AWARE,ACCESSES,ASCENDANCY
ALUMNI,AUDIOLOGY,AMOROUS,ANNUNCIATORS,ANTISERUM,ACCUMULATE,AUDUBON,AFRICANIZES,ASSENTS,AGEE
ANGULAR,APPROPRIATELY,APPRAISE,ADMINISTERED,AWFULLY,ANGEL,ABOLISHERS,ACTA,ADVERSARY,ABSENCES
