## Saving Files with NumPy

### 1. Run the following cells:

In [1]:
import numpy as np

In [2]:
np.set_printoptions(suppress = True, linewidth = 150)

In [3]:
lending_co_saving = np.genfromtxt("Lending-Company-Saving.csv", 
                                    delimiter = ',', 
                                    dtype = np.str_)

print(lending_co_saving)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [4]:
import pandas as pd
lending_co_total_price_csv = pd.read_csv('C:\\Users\\ashif\\OneDrive\\Desktop\\Data-Analyst-Course-Resources\\Data-Analyst-Course\\S10-Working-with-TextFiles\\L16\\Lending-Company-Total-Price.csv',
                                    delimiter = ',', 
                                    dtype = np.str_,)

lending_co_total_price_csv

Unnamed: 0,LoanID,StringID,Product,CustomerGender,Location,Region,TotalPrice
0,1,id_1,Product B,Male,Location 2,Region 2,16600
1,2,id_2,Product B,Male,Location 3,,16600
2,3,id_3,Product C,Female,Location 5,Region 5,15600
3,4,id_4,Product B,Male,Location 6,Region 1,16600
4,5,id_5,Product D,Female,Location 7,Region 2,20250
...,...,...,...,...,...,...,...
410,411,id_411,Product B,Male,Location 58,Region 6,16600
411,412,id_412,Product B,Male,Location 87,Region 6,16600
412,413,id_413,Product B,Male,Location 135,Region 1,16600
413,414,id_414,Product C,Female,Location 200,Region 6,15600


In [5]:
import numpy as np
lending_co_total_price = np.genfromtxt('C:\\Users\\ashif\\OneDrive\\Desktop\\Data-Analyst-Course-Resources\\Data-Analyst-Course\\S10-Working-with-TextFiles\\L16\\Lending-Company-Total-Price.csv',
                                    delimiter = ',', 
                                    dtype = np.str_,)
lending_co_total_price

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region', 'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2', '16600'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600'],
       ...,
       ['413', 'id_413', 'Product B', ..., 'Location 135', 'Region 1', '16600'],
       ['414', 'id_414', 'Product C', ..., 'Location 200', 'Region 6', '15600'],
       ['415', 'id_415', 'Product A', ..., 'Location 8', 'Region 2', '22250']], dtype='<U14')

### 2. Store the data from <i> lending_co_saving_1 </i> and <i> lending_co_saving_2 </i> in separate .npy files.
    You can use the names "Saving-Exercise-1" and "Saving-Exercise-2".

In [6]:
np.save("Saving-Exercise-1", lending_co_saving)

In [7]:
np.save("Saving-Exercise-2", lending_co_total_price)

### 3. Now load the two .npy files we just created and display their contents

In [12]:
 array_npy_1 = np.load('Saving-Exercise-1.npy')

In [13]:
array_npy_2 = np.load('Saving-Exercise-2.npy')

In [16]:
print(array_npy_1)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [15]:
print(array_npy_2)

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600']
 ...
 ['413' 'id_413' 'Product B' ... 'Location 135' 'Region 1' '16600']
 ['414' 'id_414' 'Product C' ... 'Location 200' 'Region 6' '15600']
 ['415' 'id_415' 'Product A' ... 'Location 8' 'Region 2' '22250']]


### 4. These look identical to the arrays we stored with np.save(), so let's use the np.array_equal() function to compare them. 
    A) array_npy_1 and lending_co_saving
    B) array_npy_2 amd lending_co_total_price

In [17]:
np.array_equal(array_npy_1, array_npy_2)

False

In [18]:
np.array_equal(array_npy_1, lending_co_saving)

True

In [19]:
np.array_equal(array_npy_2, lending_co_total_price)

True

### 5. Create an .npz file with both <i> lending_co_savig </i> and <i> lending_co_total_price </i>, and load it back into Python. 

In [20]:
np.savez("Saving-Exercise-3", lending_co_saving, lending_co_total_price)

In [24]:
array_npz = np.load("Saving-Exercise-3.npz")
array_npz

NpzFile 'Saving-Exercise-3.npz' with keys: arr_0, arr_1

### 6. Use the <i> files </i> attribute to examine the different .npy files in the .npz, and then display them on the screen. 

In [25]:
array_npz.files

['arr_0', 'arr_1']

In [26]:
array_npz['arr_0']

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region', 'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2', '16600.0'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600.0'],
       ...,
       ['1041', 'id_1041', 'Product B', ..., 'Location 23', 'Region 4', '16600.0'],
       ['1042', 'id_1042', 'Product C', ..., 'Location 52', 'Region 6', '15600.0'],
       ['1043', 'id_1043', 'Product B', ..., 'Location 142', 'Region 6', '16600.0']], dtype='<U14')

In [27]:
array_npz['arr_1']

array([['LoanID', 'StringID', 'Product', ..., 'Location', 'Region', 'TotalPrice'],
       ['1', 'id_1', 'Product B', ..., 'Location 2', 'Region 2', '16600'],
       ['2', 'id_2', 'Product B', ..., 'Location 3', '', '16600'],
       ...,
       ['413', 'id_413', 'Product B', ..., 'Location 135', 'Region 1', '16600'],
       ['414', 'id_414', 'Product C', ..., 'Location 200', 'Region 6', '15600'],
       ['415', 'id_415', 'Product A', ..., 'Location 8', 'Region 2', '22250']], dtype='<U14')

### 7. Assign more appropriate names for the .npy files in the .npz, load it and check the <i> files </i> attribute once again. 

In [28]:
np.savez("Saving-Exercise-3.npz", saving = lending_co_saving, total_price = lending_co_total_price)

In [30]:
array_npz = np.load("Saving-Exercise-3.npz")

In [32]:
array_npz.files

['saving', 'total_price']

### 8. Display the two arrays from the .npz.

In [33]:
print(array_npz['saving'])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600.0']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600.0']
 ...
 ['1041' 'id_1041' 'Product B' ... 'Location 23' 'Region 4' '16600.0']
 ['1042' 'id_1042' 'Product C' ... 'Location 52' 'Region 6' '15600.0']
 ['1043' 'id_1043' 'Product B' ... 'Location 142' 'Region 6' '16600.0']]


In [34]:
print(array_npz['total_price'])

[['LoanID' 'StringID' 'Product' ... 'Location' 'Region' 'TotalPrice']
 ['1' 'id_1' 'Product B' ... 'Location 2' 'Region 2' '16600']
 ['2' 'id_2' 'Product B' ... 'Location 3' '' '16600']
 ...
 ['413' 'id_413' 'Product B' ... 'Location 135' 'Region 1' '16600']
 ['414' 'id_414' 'Product C' ... 'Location 200' 'Region 6' '15600']
 ['415' 'id_415' 'Product A' ... 'Location 8' 'Region 2' '22250']]


### 9. Save the <i> lending_co_saving </i> array using the <i> np.savetxt() </i>. Specify the following:
    A) Set the file extension to .csv
    B) Set the format to strings ("%s")
    C) Set the delimiter t0 ','

In [36]:
np.savetxt('Saving-Exercise-4.csv', 
           lending_co_saving,
           fmt="%s",
           delimiter=",")

### 10. Re-import the dataset, display it on the screen and compare it to <i> lending_co_saving</i>.

In [39]:
array_csv = np.genfromtxt('Saving-Exercise-4.csv', delimiter=',', dtype=np.string_)
array_csv

array([[b'LoanID', b'StringID', b'Product', ..., b'Location', b'Region', b'TotalPrice'],
       [b'1', b'id_1', b'Product B', ..., b'Location 2', b'Region 2', b'16600.0'],
       [b'2', b'id_2', b'Product B', ..., b'Location 3', b'', b'16600.0'],
       ...,
       [b'1041', b'id_1041', b'Product B', ..., b'Location 23', b'Region 4', b'16600.0'],
       [b'1042', b'id_1042', b'Product C', ..., b'Location 52', b'Region 6', b'15600.0'],
       [b'1043', b'id_1043', b'Product B', ..., b'Location 142', b'Region 6', b'16600.0']], dtype='|S14')