# <span style="color:maroon">**Importing Data into Python - Part 1**</span>

## <span style="color:blue">**Introduction to Flat Files**</span>

In [None]:
# Basic process for opening and closing a file
file = open("./data/stock_data_demo.csv", mode="r")     # mode="r" opens the file in read mode
print(type(file))                                       # observe the class of the file object
print(file.read())                                      # displays the row level detail of the file object
print(file.closed)                                      # checks to see if the file is closed
file.close()                                            # close the file
print(file.closed)                                      # confirms that the file connection is closed
print(type(file))                                       # closing the file connection does not remove the file object that was created
print(file.read())                                      # eventhough the file object persists, closing the file no longer
                                                        # allows you to iterate over its content

In [None]:
# Using a context manager construct
with open("./data/stock_data.csv", mode="r") as file:
    print(file.readline())
    print(file.readline())
    print(file.readline())

print(file.closed)     # when you exit out of the context manager, note that the open connection to the file is automatically closed

#### Flat Files
- Text files containing records in a tabular form (.csv, .txt)
- Record: row of values associated with fields / attributes
- Column: fields / features / attributes
- Can have a header
- Delimiters such as commas, spaces, tabs

#### Importing numerical data using numpy
numpy arrays are:
- the standard for storing numerical data
- efficient, fast, and clean
- essential data structure for other packages such as scikit-learn

In [None]:
import numpy as np
file = "./data/stock_data_demo.csv"
data = np.loadtxt(file, delimiter=",", skiprows=1, usecols=[2, 3, 4])     # other delimiters include " ", "\t"
                                                                          # dtype=str can be used to coerce the array elements
print(data, "\n")
print(type(data))

In [None]:
import numpy as np
file = "./data/stock_data_demo.csv"
data = np.genfromtxt(file, delimiter=",", names=True, dtype=None)

# data is a structured array, each element in the list is a tuple
print(data, "\n")   
print(type(data))

In [None]:
# accessing data in a structured array
print(data[0], "\n")     # retrieve the row with index = 0
print(data[0:3], "\n")   # retrieve three rows with index 0, 1, 2
print(data[0][2], "\n")  # retrieve the row index 0 and column index 2

In [None]:
import numpy as np
file = "./data/stock_data_demo.csv"
d = np.recfromcsv(file)     # delimiter=",", names=True, dtype=None are the default values

# accessing data in a structured array (note that this is identical to the genfromtxt() function)
print(d[0], "\n")     # retrieve the row with index = 0
print(d[0:3], "\n")   # retrieve three rows with index 0, 1, 2
print(d[0][2], "\n")  # retrieve the row index 0 and column index 2

#### Importing data into a pandas dataframe

In [None]:
import pandas as pd
help(pd.read_csv)     # note how extensive degree to which pd.read_csv can be customized

# this is an example from DataCamp --> data = pd.read_csv(file, sep="\t", comment="#", na_values="Nothing")

In [None]:
import pandas as pd
file = "./data/stock_data_demo.csv"
df = pd.read_csv(file)
print(df.head())
# note that if the dataframe is all numerical, you can convert it into a numpy array --> data_numpy_array = np.array(df)

## <span style="color:blue">**Importing Data from Other File Types**</span>

## <span style="color:blue">**Working with Relational Databases in Python**</span>

## <span style="color:blue">**Miscellaneous**</span>

**Zen of Python**

In [None]:
import this
%run "C:\ProgramData\Anaconda3\lib\this.py"

**iPython has a whole bunch of magic commands; documentation can be found at --> https://ipython.readthedocs.io/en/stable/overview.html**

In [None]:
# magic command - prefixed by % - displays content of working directory
%ls