In [1]:
import numpy as np

In [2]:
import pandas as pd

## <u>Object Creation</u>

In [3]:
s = pd.Series([1,3,5,np.nan,6,8])

In [4]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

## Create a DataFrame by passing an np.array with datetime as index and labeled columns

In [5]:
# Create date range to be used as index
dates = pd.date_range('20130101', periods=6)

In [6]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.214607,-1.296427,1.065995,0.129996
2013-01-02,-0.321355,-0.219069,-0.710173,1.367937
2013-01-03,1.971057,-0.461386,1.002142,-0.172001
2013-01-04,0.109596,-0.196429,0.199416,-2.858934
2013-01-05,0.5067,-0.482987,0.044358,0.394469
2013-01-06,-0.052377,1.111328,-0.834003,0.546172


## Create a DataFrame from a dict of objects

In [12]:
df2 = pd.DataFrame({'A':1.0,
                    'B':pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3]*4, dtype='int32'),
                    'E':pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})

In [13]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [14]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

## Viewing Data

In [15]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-1.214607,-1.296427,1.065995,0.129996
2013-01-02,-0.321355,-0.219069,-0.710173,1.367937
2013-01-03,1.971057,-0.461386,1.002142,-0.172001
2013-01-04,0.109596,-0.196429,0.199416,-2.858934
2013-01-05,0.5067,-0.482987,0.044358,0.394469


In [16]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
# convert DataFrame to numpy array
df.to_numpy()

array([[-1.21460696, -1.29642743,  1.06599532,  0.12999645],
       [-0.32135455, -0.21906944, -0.7101732 ,  1.36793691],
       [ 1.97105702, -0.46138627,  1.00214224, -0.17200051],
       [ 0.10959648, -0.19642945,  0.19941607, -2.85893368],
       [ 0.50669958, -0.48298726,  0.04435751,  0.39446934],
       [-0.05237663,  1.11132845, -0.83400339,  0.54617242]])

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.166502,-0.257495,0.127956,-0.098727
std,1.055345,0.781112,0.810437,1.448217
min,-1.214607,-1.296427,-0.834003,-2.858934
25%,-0.25411,-0.477587,-0.521541,-0.096501
50%,0.02861,-0.340228,0.121887,0.262233
75%,0.407424,-0.202089,0.801461,0.508247
max,1.971057,1.111328,1.065995,1.367937


In [20]:
# Transposing data (rows become columns and columns become rows)
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-1.214607,-0.321355,1.971057,0.109596,0.5067,-0.052377
B,-1.296427,-0.219069,-0.461386,-0.196429,-0.482987,1.111328
C,1.065995,-0.710173,1.002142,0.199416,0.044358,-0.834003
D,0.129996,1.367937,-0.172001,-2.858934,0.394469,0.546172


In [21]:
# Sort by column name
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.129996,1.065995,-1.296427,-1.214607
2013-01-02,1.367937,-0.710173,-0.219069,-0.321355
2013-01-03,-0.172001,1.002142,-0.461386,1.971057
2013-01-04,-2.858934,0.199416,-0.196429,0.109596
2013-01-05,0.394469,0.044358,-0.482987,0.5067
2013-01-06,0.546172,-0.834003,1.111328,-0.052377


In [22]:
# Sort DataFrame by specific column values.
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-01,-1.214607,-1.296427,1.065995,0.129996
2013-01-05,0.5067,-0.482987,0.044358,0.394469
2013-01-03,1.971057,-0.461386,1.002142,-0.172001
2013-01-02,-0.321355,-0.219069,-0.710173,1.367937
2013-01-04,0.109596,-0.196429,0.199416,-2.858934
2013-01-06,-0.052377,1.111328,-0.834003,0.546172
