# Reading & Writing Files to Local File-System

## Step 1: Generate some Random Data

In [1]:
# Define required libraries
import numpy as np
import pandas as pd

In [2]:
# Create a dataframe called "random_data" - 4 cols x 100 row 
random_data = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))
random_data.head()

Unnamed: 0,A,B,C,D
0,45,61,97,70
1,69,96,65,67
2,54,10,84,31
3,98,27,64,22
4,55,72,29,1


## Step 2: Write the dataframe to the project file-system & Validate Existence

In [3]:
# List all files in the data_assets directory of the project file system
!ls -l /project_data/data_asset/

total 124
-rw-r--r--. 1 wsuser watsonstudio 45818 Dec  9 13:19 Auto AI Training Data.csv
-rw-r-----. 1 wsuser watsonstudio  4813 Dec  9 18:39 Customers To Score.csv
-rw-r--r--. 1 wsuser watsonstudio  2847 Dec  9 11:37 Mortgage_Batch_Input_Data.csv
-rw-r--r--. 1 wsuser watsonstudio  1571 Dec  9 12:33 Mortgage_Batch_Input_Data_Reduced.csv
-rw-r--r--. 1 wsuser watsonstudio 37237 Dec  6 16:16 Mortgage_Customer.csv
-rw-r--r--. 1 wsuser watsonstudio  7327 Dec  6 16:16 Mortgage_Default.csv
-rw-r--r--. 1 wsuser watsonstudio 11343 Dec  6 16:16 Mortgage_Property.csv


In [4]:
# Write the dataframe to a .csv in the project file-system
random_data.to_csv('/project_data/data_asset/WrittenToFileSystem.csv', sep=',', index=False)

In [5]:
# List all files in the data_assets directory of the project file system
!ls -l /project_data/data_asset/

total 128
-rw-r--r--. 1 wsuser watsonstudio 45818 Dec  9 13:19 Auto AI Training Data.csv
-rw-r-----. 1 wsuser watsonstudio  4813 Dec  9 18:39 Customers To Score.csv
-rw-r--r--. 1 wsuser watsonstudio  2847 Dec  9 11:37 Mortgage_Batch_Input_Data.csv
-rw-r--r--. 1 wsuser watsonstudio  1571 Dec  9 12:33 Mortgage_Batch_Input_Data_Reduced.csv
-rw-r--r--. 1 wsuser watsonstudio 37237 Dec  6 16:16 Mortgage_Customer.csv
-rw-r--r--. 1 wsuser watsonstudio  7327 Dec  6 16:16 Mortgage_Default.csv
-rw-r--r--. 1 wsuser watsonstudio 11343 Dec  6 16:16 Mortgage_Property.csv
-rw-r-----. 1 wsuser watsonstudio  1165 Dec 10 11:48 WrittenToFileSystem.csv


### <font color=blue> The data is stored in the file system but not included in the project data list - See the Project Asset Tab!!</font>

## Step 3: Read the newly created file as a new dataframe in the notebook

In [6]:
ReadFromFileSystem = pd.read_csv('/project_data/data_asset/WrittenToFileSystem.csv')

print('')
print('The dataframe "WorkingFile" has',ReadFromFileSystem.shape[1], 'columns and ', ReadFromFileSystem.shape[0], 'rows')
print('')
print('Summary statistics for the WorkingFile dataframe are:')
print(ReadFromFileSystem.describe())



The dataframe "WorkingFile" has 4 columns and  100 rows

Summary statistics for the WorkingFile dataframe are:
                A           B           C           D
count  100.000000  100.000000  100.000000  100.000000
mean    52.150000   55.350000   47.410000   45.770000
std     27.703836   28.258323   26.933549   27.021075
min      0.000000    1.000000    0.000000    0.000000
25%     31.750000   30.000000   26.750000   23.750000
50%     51.500000   60.000000   50.000000   47.000000
75%     74.250000   79.250000   69.750000   64.750000
max     99.000000   99.000000   97.000000   97.000000


#### The data file "WrittenToFileSystem.csv" can either be deleted in the project assets or using Linux command lines

In [7]:
!rm -f /project_data/data_asset/WrittenToFileSystem.csv
!ls -l /project_data/data_asset/

total 124
-rw-r--r--. 1 wsuser watsonstudio 45818 Dec  9 13:19 Auto AI Training Data.csv
-rw-r-----. 1 wsuser watsonstudio  4813 Dec  9 18:39 Customers To Score.csv
-rw-r--r--. 1 wsuser watsonstudio  2847 Dec  9 11:37 Mortgage_Batch_Input_Data.csv
-rw-r--r--. 1 wsuser watsonstudio  1571 Dec  9 12:33 Mortgage_Batch_Input_Data_Reduced.csv
-rw-r--r--. 1 wsuser watsonstudio 37237 Dec  6 16:16 Mortgage_Customer.csv
-rw-r--r--. 1 wsuser watsonstudio  7327 Dec  6 16:16 Mortgage_Default.csv
-rw-r--r--. 1 wsuser watsonstudio 11343 Dec  6 16:16 Mortgage_Property.csv


## Step 4: Repeat but using the Project Library to read/write directly to and from the project

##### This is documented at: https://www.ibm.com/support/producthub/icpdata/docs/content/SSQNUZ_current/wsj/analyze-data/project-lib-python.html

#### Write the dataframe to the project

In [9]:
# Import the Project Library
from project_lib import Project
project = Project.access()

In [10]:
# Save the randomly created dataframe to the project
project.save_data("WrittenToProject.csv", random_data.to_csv())

{'file_name': 'WrittenToProject.csv',
 'message': 'File saved to project storage.',
 'asset_id': '0a27d607-921f-40db-9a7f-12c8a390bd1a'}

#### You should see the file in your list of Project Data Assets

## Step 5: Read the data from the project assets and drop the index that is automatically created

In [11]:
# Fetch the file
my_file = project.get_file("WrittenToProject.csv")

# Read the CSV data file into a pandas DataFrame
my_file.seek(0)
#ReadFromProject = pd.read_csv(my_file)
ReadFromProject = pd.read_csv(my_file).drop(['Unnamed: 0'],axis=1)
ReadFromProject.head()

Unnamed: 0,A,B,C,D
0,45,61,97,70
1,69,96,65,67
2,54,10,84,31
3,98,27,64,22
4,55,72,29,1


-------------------------------------------------------
<u>Author Information:</u><br>
<b>Stephen Groves</b><br>
<i>Data Science & AI Technical Sales, IBM Europe</i><br>
steve.groves@uk.ibm.com<br>
9th December 2019