# ------------    dataframe basics    ------------
    # we mainly work with "dataframes" in pandas
    # Dataframes are built from Series objects

In [1]:
import numpy as np
import pandas as pd

In [2]:
# retun random numbers from "standared NORMAL distribution" centered around 0
from numpy.random import randn

In [3]:
# we're going to use np.random.seed(), so that we'll get a same random values
    # np.random.seed() is a function in NumPy that sets the seed for the random number generator.
    # Setting the "seed" to a specific value makes the random number generation predictable
    # Every time you run this code with the seed set to 101, the same sequence of random numbers will be generated.
np.random.seed(101)
rnd_20 = randn(5, 4)
rnd_20

array([[ 2.70684984,  0.62813271,  0.90796945,  0.50382575],
       [ 0.65111795, -0.31931804, -0.84807698,  0.60596535],
       [-2.01816824,  0.74012206,  0.52881349, -0.58900053],
       [ 0.18869531, -0.75887206, -0.93323722,  0.95505651],
       [ 0.19079432,  1.97875732,  2.60596728,  0.68350889]])

___

## --------    Building a Dataframe    --------
    # similar to Series(), DataFrame() also takes, "data", "index" as arguments
    # there ia also a "columns" argumnet for columns
    # note that: "index" also represents the rows


In [4]:
fd = pd.DataFrame(data=rnd_20, index=["r1", "r2", "r3", "r4", "r5"], columns=["c1", "c2", "c3", "c4"])
# randn(5, 4) generates 5x4 matrix of 20 random numbers from "NORMAL distribution"
    # that's why we've used "5-index for 5-rows" and 4-columns
fd

Unnamed: 0,c1,c2,c3,c4
r1,2.70685,0.628133,0.907969,0.503826
r2,0.651118,-0.319318,-0.848077,0.605965
r3,-2.018168,0.740122,0.528813,-0.589001
r4,0.188695,-0.758872,-0.933237,0.955057
r5,0.190794,1.978757,2.605967,0.683509


In [5]:
# Each column is a "Pandas-Series", so, "c1" is a Series, as well as "c2", "c3", "c4"
# and they all share a "Common-index"

# Basically all DataFrames is a bunch of series that shares a same index
# we can select these Series-objects

r1    0.907969
r2   -0.848077
r3    0.528813
r4   -0.933237
r5    2.605967
Name: c3, dtype: float64

### --------    accessing Series from DataFrame    --------
#### method 1: specify the column name


In [None]:
fd['c3']

In [6]:
# it is actually a Series, we can confirm it by type checking
type(fd['c3'])

pandas.core.series.Series

In [7]:
# but the type of 'fd' is DataFrame
type(fd)

pandas.core.frame.DataFrame

#### method 2: SQL-format, useing "."