In [1]:
import pandas as pd
from zipfile import ZipFile

# Make sure souvenir.zip is in your current working directory

zip_folder = ZipFile('souvenir.zip')  # Creates a ZipFile object

# To open a csv file in the zip file as a dataframe:
# This is really only helpful if the zip file only contains one file

dataframe = pd.read_csv(zip_folder.open('category.csv'))
dataframe

Unnamed: 0,CategoryID,Name
0,1,Art
1,2,Artifact
2,3,Book
3,4,Clothing
4,5,Food
5,6,Geological
6,7,Kitchenware
7,8,Miscellaneous
8,9,Postcard
9,10,Technology


* The above method isn't very efficient for zip folders with lots of files, so what we can instead do is create a dictionary with all of the filenames

* Key will be file name, i.e. 'category.csv' and value will be Pandas dataframe

In [2]:
# Create empty dictionary
dataframes_dict = {}

# You can't iterate over a ZipFile object but you CAN use a loop on the filenames in its infolist()
# The infolist() contains metadata for each file in the zip folder

zip_folder.infolist() 

[<ZipInfo filename='category.csv' compress_type=deflate external_attr=0x20 file_size=152 compress_size=130>,
 <ZipInfo filename='city.csv' compress_type=deflate external_attr=0x20 file_size=1451 compress_size=920>,
 <ZipInfo filename='country.csv' compress_type=deflate external_attr=0x20 file_size=519 compress_size=350>,
 <ZipInfo filename='location.csv' compress_type=deflate external_attr=0x20 file_size=934 compress_size=570>,
 <ZipInfo filename='owner.csv' compress_type=deflate external_attr=0x20 file_size=67 compress_size=48>,
 <ZipInfo filename='region.csv' compress_type=deflate external_attr=0x20 file_size=1471 compress_size=896>,
 <ZipInfo filename='souvenir.csv' compress_type=deflate external_attr=0x20 file_size=12651 compress_size=6153>]

In [3]:
# We are only interested in the filename
# Remember it's a list!
# To get filename for first file:

zip_folder.infolist()[0].filename

'category.csv'

In [6]:
# Iterate over every file and store it as a dataframe in the dataframes dictionary
for file in zip_folder.infolist():
    if file.filename.endswith('.csv'):  # Not necessary for this zip file but you can include certain file types and exclude others
        csv = file.filename
        dataframe = pd.read_csv(zip_folder.open(csv))
        dataframes_dict[csv] = dataframe

# Type a file name to get a dataframe from the dictionary
dataframes_dict['owner.csv']

Unnamed: 0,OwnerID,Name
0,1,Group 1
1,2,Group 2
2,3,Group 3
3,4,Group 4
4,5,Group 5
