## Creating DataFrame from scratch

#### Import Pandas and set some display options for output

In [1]:
# Reference Pandas and Numpy
import numpy as np
import pandas as pd
# Set the output options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows',10)

### Creating a DataFrame from scratch

#### Create DataFrame from 2-D ndarray

In [2]:
pd.DataFrame(np.array([[10,20],[21,22]]))

    0   1
0  10  20
1  21  22

#### Create DataFrame object by passing a list of Series object

In [3]:
df1 = pd.DataFrame([pd.Series(np.arange(10,20)),pd.Series(np.arange(30,40))])
df1

    0   1   2   3   4   5   6   7   8   9
0  10  11  12  13  14  15  16  17  18  19
1  30  31  32  33  34  35  36  37  38  39

#### Dimensions of DataFrame objects

In [4]:
df1.shape

(2, 10)

### Specifying the column name

In [5]:
df2 = pd.DataFrame(np.array([[10,20],[30,40]]), columns = ["a","b"])
df2

    a   b
0  10  20
1  30  40

#### Access the names of columns for the DataFrame

In [6]:
df2.columns

Index(['a', 'b'], dtype='object')

#### Accessing the default column names

In [7]:
df1.columns

RangeIndex(start=0, stop=10, step=1)

### Retrive just the names of DataFrame columns by position

In [8]:
"{0}, {1}".format(df2.columns[0],df2.columns[1])

'a, b'

#### The Names of column can be changed by using .columns property

In [9]:
df2.columns = ['c','d']
"{0},{1}".format(df2.columns[0],df2.columns[1])

'c,d'

#### Assignment of index labels of the DataFrame

In [10]:
df3 = pd.DataFrame(np.array([[1,2,3],[4,5,6]]),columns = ["a","b","c"], index = ["d", "e"])
df3

   a  b  c
d  1  2  3
e  4  5  6

#### Access the index of the DataFrame object

In [11]:
df3.index

Index(['d', 'e'], dtype='object')

In [12]:
print("""The name of index of Data Frame are:- 
         {0},{1}\n""".format(df3.index[0],df3.index[1]),
     """The name of columns of the Data Frame are:-
         {0},{1},{2}""".format(df3.columns[0],df3.columns[1],df3.columns[2]))

The name of index of Data Frame are:- 
         d,e
 The name of columns of the Data Frame are:-
         a,b,c


#### Creation of DataFrame by a dictionary and Series object

In [13]:
s1 = pd.Series(np.arange(1,6))
s2 = pd.Series(np.arange(7,12))
df4 = pd.DataFrame({'c1':s1,'c2':s2})
df4

   c1  c2
0   1   7
1   2   8
2   3   9
3   4  10
4   5  11

#### Demonstration of automatic alingnment in DataFrame

In [14]:
s3 = pd.Series(np.arange(13,15), index = [1,3])
df5 = pd.DataFrame({'c1':s1,'c2':s2,'c3':s3})
df5

   c1  c2    c3
0   1   7   NaN
1   2   8  13.0
2   3   9   NaN
3   4  10  14.0
4   5  11   NaN

### Read Data from file

#### To view the entire content of file on windows

In [None]:
!type data\constituents-financials_csv.csv

### Read the csv file and Examine the first 5 records using '.head()' method

In [21]:
sp505 = pd.read_csv("data\constituents-financials_csv.csv", index_col = "Symbol",usecols = [0,2,3,12])
sp505.head()

                        Sector   Price  Price/Book
Symbol                                            
MMM                Industrials  222.89       11.34
AOS                Industrials   60.24        6.35
ABT                Health Care   56.27        3.19
ABBV               Health Care  108.48       26.14
ACN     Information Technology  150.51       10.62

### Examine the last 5 records using '.tail()' method

In [17]:
sp505.tail()

                        Sector   Price  Price/Book
Symbol                                            
XYL                Industrials   70.24        5.31
YUM     Consumer Discretionary   76.30      212.08
ZBH                Health Care  115.53        2.39
ZION                Financials   50.71        1.42
ZTS                Health Care   71.51       18.09

### Verify The nunmber of records/rows in the data frame

In [18]:
len(sp505)

505

### Examine the index of DataFrame by .index attribute

In [19]:
sp505.index

Index(['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ATVI', 'AYI', 'ADBE', 'AAP', 'AMD',
       ...
       'WYNN', 'XEL', 'XRX', 'XLNX', 'XL', 'XYL', 'YUM', 'ZBH', 'ZION', 'ZTS'],
      dtype='object', name='Symbol', length=505)

### Get the columns of the Data Frame

In [20]:
sp505.columns

Index(['Sector', 'Price', 'Price/Book'], dtype='object')