In [1]:
import numpy as np

In [2]:
lending_co = np.genfromtxt('Lending-Company-Saving.csv',
                          delimiter=',',
                          dtype=str)
lending_co

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

## np.save()

#### np.save() method will create '.npy' in the same directory as your notebook
* .npy files are faster to work with
* more compact
* entire dataset keeps its format, we dont need to worry about the specifying and reorgainizing the values from the external file.
* .npy is is technically a text file
* When we load the dataset to python it will consider them as numbers automatically

### load-a-dataset != import-a-dataset
* Loading, we don't need to specify or change our data while working with our python object.
* Importing, doesn't keep track of the datatype of the original array, we may need to specify the datatype of the values after having brought them into python

In [3]:
np.save('lending-company-saving',lending_co)

In [4]:
lending_data_save = np.load('lending-company-saving.npy')
print(lending_data_save)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [5]:
np.array_equal(lending_data_save,lending_co)

True

## np.savez()
* It doesn't create an .npy file
* It creates **.npz** file
* **npz** is like an archive of NPYs that can store multiple arrays
* Instead of storing different datasets in seperate **npy** files, we can store all of them in a single **npz**
* By default the **npz** files stores each dataset as a separate array with a generic name

In [6]:
lending_co = np.genfromtxt('Lending-Company-Saving.csv',
                          delimiter=',',
                          dtype=str)
lending_data_save = np.load('lending-company-saving.npy')

In [7]:
np.savez("Lending-Company-Saving",lending_co,lending_data_save)

In [9]:
lending_data_savez = np.load('Lending-Company-Saving.npz')
print(lending_data_savez)

<numpy.lib.npyio.NpzFile object at 0x00000217DEC6F640>


To open first array

In [10]:
print(lending_data_savez['arr_0'])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [11]:
print(lending_data_savez["arr_1"])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [12]:
np.savez("Lending-Company-Saving",company=lending_co,data_save=lending_data_save)

In [13]:
lending_data_savez = np.load('Lending-Company-Saving.npz')

In [14]:
lending_data_savez.files

['company', 'data_save']

In [18]:
print(lending_data_savez["company"])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [19]:
print(lending_data_savez['data_save'])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [20]:
np.array_equal(lending_data_savez['company'],lending_data_savez['data_save'])

True

## np.savetxt()
* Helps store NumPY datasets in text files **.txt** or **.csv**

In [21]:
lending_co = np.genfromtxt('Lending-Company-Saving.csv',
                          delimiter=',',
                          dtype=str)
np.savetxt("Lending-Company-Saving.txt",
          lending_co,
          fmt='%s',
          delimiter=',')

In [22]:
lending_data_savetxt = np.genfromtxt('Lending-Company-Saving.txt',
                                    delimiter=',',
                                    dtype=str)
lending_data_savetxt

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region',
        'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2',
        '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4',
        '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6',
        '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6',
        '16600.0']], dtype='<U14')

In [23]:
np.array_equal(lending_data_savetxt,lending_data_save)

True