[Reference](https://towardsdatascience.com/the-python-standard-library-modules-you-should-know-as-a-data-scientist-47e1117ca6c8)

# 1. Zipfile

In [1]:
import zipfile
import os

# open the zip file in reading mode
myzip = zipfile.ZipFile('AREAS_DEPORTIVAS_2019.zip', mode='r')

# extract all files from the zip file to the current working directory
myzip.extractall()

# check that the files have been correctly extracted
os.listdir()

# close the zip file
myzip.close()

In [2]:
import zipfile

# open the zip file in reading mode and extract all files to the current working directory.
with zipfile.ZipFile('AREAS_DEPORTIVAS_2019.zip', mode='r') as myzip:
    myzip.extractall()

In [3]:
import zipfile

# open the zip file in reading mode and extract all files to the current working directory.
with zipfile.ZipFile('AREAS_DEPORTIVAS_2019.zip', mode='r') as myzip:
    print(myzip.namelist())
    # ['20190924_AREAS_DEPORTIVAS.sbn', '20190924_AREAS_DEPORTIVAS.sbx', '20190924_AREAS_DEPORTIVAS.shp', 
    # '20190924_AREAS_DEPORTIVAS.shx', '20190924_AREAS_DEPORTIVAS.cpg', '20190924_AREAS_DEPORTIVAS.dbf', 
    # '20190924_AREAS_DEPORTIVAS.prj']

In [4]:
import zipfile

# create a new zip file.
with zipfile.ZipFile('new_file.zip', mode='w') as myzip:
    myzip.write('file_1.txt')
    myzip.write('file_2.txt')

# 2. Random

In [5]:
import random
import numpy as np

# random.randint (Python Standard Library) upper bound included
random_numbers = [random.randint(0,2) for _ in range(10)]
print(random_numbers)
# [0, 2, 1, 1, 0, 1, 0, 0, 2, 1]

# numpy.random.randint upper bound excluded
np.random.randint(0,2, size=10)

[2, 0, 1, 2, 2, 2, 1, 0, 2, 0]


array([0, 1, 0, 1, 0, 0, 0, 1, 1, 1])

In [6]:
import random

# list of numbers
numbers = [1, 2, 3, 4, 5]

# randomly select 10 elements from the list numbers with replacement
random.choices(numbers, k=10)
# [3, 5, 5, 2, 3, 5, 1, 2, 1, 2]

# randomly select 20 elements from the list numbers with weighted probabilities
random.choices(numbers, weights=[0.1, 0.1, 0.6, 0.1, 0.1], k=20)

[3, 1, 3, 5, 3, 1, 3, 2, 3, 3, 2, 3, 2, 3, 3, 1, 5, 3, 3, 4]

In [7]:
# list of numbers
numbers = [1, 2, 3, 4, 5]

# shuffled list
random.shuffle(numbers)
print(numbers)
# [4, 1, 3, 5, 2]

# tuple of numbers
numbers_2 = (1, 2, 3, 4, 5)

# the input of the random.shuffle function must be a mutable object 
random.shuffle(numbers_2)

[1, 3, 2, 4, 5]


TypeError: ignored

In [8]:
# tuple of numbers
numbers_2 = (1, 2, 3, 4, 5)

# new list with all the elements randomly shuffled
shuffled_list = random.sample(numbers_2, k=len(numbers_2))
print(shuffled_list)
# [4, 3, 5, 2, 1]

# convert shuffled_list to a tuple using the built-in function tuple()
shuffled_tuple = tuple(shuffled_list)
print(shuffled_tuple)

[1, 3, 5, 4, 2]
(1, 3, 5, 4, 2)


# 3. Os

In [9]:
import os

# get the path of the current working directory
path = os.getcwd()
print(path)

/content


In [11]:
# change the current working directory
os.chdir('C:\\Users\\amand')

# check the modification
os.getcwd()

In [12]:
# list of all files and directories in the specified path
entries = os.listdir('C:\\Users\\amand\\articulos\\Standard library')

print(entries)

In [13]:
# list of all files in the current working directory
entries = os.listdir()

# get only .txt files
txt_files = [entry for entry in entries if entry.endswith('.txt')]

print(txt_files)

In [14]:
# list of all files in the current working directory
entries = os.listdir()
print(entries)
# ['.ipynb_checkpoints', 'AREAS_DEPORTIVAS_2019.zip', 'file_1.txt', 'file_2.txt', 'images', 'new_file.zip', 'Python Standard Library.ipynb']

# rename file_1.txt
os.rename('file_1.txt', 'file_new_name.txt')

# check the modification
entries = os.listdir()
print(entries)

In [15]:
# list of all files in the current working directory
entries = os.listdir()
print(entries)
# ['.ipynb_checkpoints', 'AREAS_DEPORTIVAS_2019.zip', 'file_2.txt', 'file_new_name.txt', 'images', 'new_file.zip',
#  'Python Standard Library.ipynb']

# make a new directory called new_dir 
os.mkdir('new_dir')

# check the modification
entries = os.listdir()
print(entries)

In [16]:
# file_2.txt is located in the current working directory
# os.startfile() opens it with it's associated program - in my case Windows Editor
os.startfile('file_2.txt')

# 4. Time

In [18]:
# compute execution time of a block of code
import time

start_time = time.time()

total = 0
for i in range(50000):
    total = total + i

# elapsed time
elapsed_time = time.time() - start_time
print(elapsed_time)

0.013227224349975586


In [19]:
import time

import requests

items_to_scrape = ['https://en.wikipedia.org/wiki/Madrid', 'https://en.wikipedia.org/wiki/Munich', 'https://en.wikipedia.org/wiki/Valencia']
for item in items_to_scrape:
    response = requests.get(item)
    ### 
    ### your code
    ### 
    time.sleep(2)

# 5. Datetime

In [20]:
import datetime

# create a datetime object - George Orwell birth date
birth = datetime.datetime(1903, 6, 25)

print(type(birth))
# <class 'datetime.datetime'>

# access attribute - dot notation
birth.year
# 1903

# access attribute - getattr() function
getattr(birth, 'year')

<class 'datetime.datetime'>


1903

In [21]:
# get current date and time
now = datetime.datetime.now()

print(now)
# 2020-05-20 20:12:56.396202
print(type(now))

2021-05-06 14:55:57.143332
<class 'datetime.datetime'>


In [22]:
# create a datetime object - George Orwell birth date
birth = datetime.datetime(1903, 6, 25)

# create a datetime object - George Orwell death date
death = datetime.datetime(1950, 1, 21)

# calculate how long he lived by subtracting both dates
live = death - birth
print(live)
# 17012 days, 0:00:00
print(type(live))

17012 days, 0:00:00
<class 'datetime.timedelta'>


In [23]:
# George Orwell's birth date
birth = datetime.datetime(1903, 6, 25)

# get the date in the format 25/06/1903
date = birth.strftime('%d/%m/%Y')
print(date)
# 25/06/1903
print(type(date))
# <class 'str'>

# get the date in the format 25-June-1903
date = birth.strftime('%d-%B-%Y')
print(date)
# 25-June-1903

# get the date in the format June 25, 1903
date = birth.strftime('%B %d, %Y')
print(date)

25/06/1903
<class 'str'>
25-June-1903
June 25, 1903


In [24]:
# a string representing George Orwell's birth date
birth_date = '25/06/1903'
date = datetime.datetime.strptime(birth_date, '%d/%m/%Y')

print(date)
# 1903-06-25 00:00:00
print(type(date))

1903-06-25 00:00:00
<class 'datetime.datetime'>


# 6. Csv

In [25]:
import csv 

# open a csv file with the open function
with open('students_grades.csv', newline='') as csvfile:
    # create an iterator object with the csv.reader() function
    reader = csv.reader(csvfile)
    # iterate over the reader object
    for line in reader:
        print(line)

In [26]:
import csv 

# open a csv file with the open function
with open('students_grades.csv', newline='') as csvfile:
    # create an iterator object with the csv.reader() function
    reader = csv.reader(csvfile)
    # access next element from the iterator reader
    print(next(reader))
    # ['Student', ' Exam_1', ' Exam_2', ' Exam_3']
    print(next(reader))

In [27]:
import csv 

# open a csv file with the open function
with open('students_grades_2.csv', newline='') as csvfile:
    # create an iterator object with the csv.reader() function
    reader = csv.reader(csvfile, delimiter=';')
    # iterate over the reader object
    for line in reader:
        print(line)

In [28]:
import csv 

# open a csv file with the open function
with open('students_grades.csv', newline='') as csvfile:
    # create an iterator object with the csv.DictReader() function
    dict_reader = csv.DictReader(csvfile)
    # iterate over the dict_reader object
    for line in dict_reader:
        print(line)

In [29]:
import csv 

# nested list containing information about works of art 
works_of_art = [['Picture', 'Year', 'Author'],
                ['Las Meninas', 1656, 'Velazquez'],
                ['Guernica', 1937, 'Picasso'],
                ['Duelo a garrotazos', 1819, 'Goya']]

# open a csv file with the open function
with open('works_of_art.csv', mode='w', newline='') as csvfile:
    # create a writer object
    csvwriter = csv.writer(csvfile)
    # use the writerows method
    csvwriter.writerows(works_of_art)

In [30]:
import csv 

# nested list containing information about works of art 
header =  ['Picture', 'Year', 'Author']
works_of_art = [{'Picture': 'Las Meninas', 'Year': 1656, 'Author': 'Velazquez'},
                {'Picture': 'Guernica', 'Year': 1937, 'Author': 'Picasso'},
                {'Picture': 'Duelo a garrotazos', 'Year': 1819, 'Author': 'Goya'}]


# open a csv file with the open function
with open('works_of_art_2.csv', mode='w', newline='') as csvfile:
    # create a writer object
    DictWriter = csv.DictWriter(csvfile, fieldnames=header)
    # write header
    DictWriter.writeheader()
    # use the writerows method 
    DictWriter.writerows(works_of_art)

# 7. Glob

In [31]:
import glob

csv_files = glob.glob('*.csv')

print(csv_files)

['works_of_art.csv', 'works_of_art_2.csv']


In [32]:
import glob

students_files = glob.glob('students*')

print(students_files)

[]


In [33]:
import glob

text_files_234 = glob.glob('file_[234].txt')

print(text_files_234)

[]


# 8. Difflib

In [34]:
import requests
import pandas as pd

response = requests.get('https://www.worldometers.info/world-population/population-by-country/')
html = response.text

# read html table into a list of dataframes
web_tables = pd.read_html(html)

# select first dataframe - columns Country and Population
df_population = web_tables[0][['Country (or dependency)','Population (2020)']]

# rename columns 
new_names = {'Country (or dependency)':'Country', 'Population (2020)': 'Population'}
df_population = df_population.rename(columns=new_names)

# set the column country as index 
df_population.set_index('Country', inplace=True)

# set index with lowercase letters
df_population.index = df_population.index.str.lower()

# visualize first 5 rows 
df_population.head()

Unnamed: 0_level_0,Population
Country,Unnamed: 1_level_1
china,1439323776
india,1380004385
united states,331002651
indonesia,273523615
pakistan,220892340


In [35]:
import difflib

while True:
    country = input('Introduce a country name: ').lower()

    if country in df_population.index:
        print('The population of {} is {:,}\n'.format(country, df_population.loc[country, 'Population']))
    else: 
        match = difflib.get_close_matches(country, df_population.index, 1)
        if match: 
            is_match = input('Do you mean ' + match[0] + '? (Yes/No) ')
            if is_match == 'Yes':
                print('The population of {} is {:,}\n'.format(match[0], df_population.loc[match[0], 'Population']))
            else: 
                print('Country not found\n')
        else:
            print('Country not found\n')

Introduce a country name: china
The population of china is 1,439,323,776

Introduce a country name: chiina
Do you mean china? (Yes/No) Yes
The population of china is 1,439,323,776

Introduce a country name: spain
The population of spain is 46,754,778

Introduce a country name: ly
Country not found



KeyboardInterrupt: ignored