source: https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# 10 minutes to PANDAS

### Creating a Series by passing a list of values

In [3]:
s = pd.Series([1,3,5,np.nan,6,8])

In [4]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

### Creating a Dataframe by passing a numpy array, with a datetime index and labeled columns

In [7]:
dates = pd.date_range("20130101", periods=12)

In [8]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10', '2013-01-11', '2013-01-12'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(12,4), index=dates, columns=list("ABCD"))

In [11]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.709481,1.949193,2.203106,-0.343606
2013-01-02,-0.855094,-0.592326,0.655856,1.636701
2013-01-03,0.325173,1.948598,0.66336,-1.282217
2013-01-04,1.578679,-0.356117,-0.615471,-0.088159
2013-01-05,0.608486,-1.113114,0.139077,-0.726145
2013-01-06,1.098233,0.416327,1.605768,0.364724
2013-01-07,-0.586936,-1.631725,-0.605183,0.398375
2013-01-08,-0.804077,-0.863647,-1.053357,0.668249
2013-01-09,0.490359,-0.954327,-0.774417,-0.927255
2013-01-10,0.001338,-2.185092,0.811966,0.168846


### Creating a Dataframe by passing a dictionary of objects that can be converted to series-like 

In [19]:
df2 = pd.DataFrame({"A": 1.,
                    "B": pd.Timestamp("20130102"),
                    "C": pd.Series(1, index=list(range(4)), dtype="float32"),
                    "D": np.array([3] * 4, dtype="int32"),
                    "E": pd.Categorical(["test", "train", "test", "train"]),
                    "F": "foo"} )

The columns of the resulting DataFrame have different dtypes 

In [22]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Viewing top and bottom rows

In [23]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.709481,1.949193,2.203106,-0.343606
2013-01-02,-0.855094,-0.592326,0.655856,1.636701
2013-01-03,0.325173,1.948598,0.66336,-1.282217
2013-01-04,1.578679,-0.356117,-0.615471,-0.088159
2013-01-05,0.608486,-1.113114,0.139077,-0.726145


In [24]:
df.tail()

Unnamed: 0,A,B,C,D
2013-01-08,-0.804077,-0.863647,-1.053357,0.668249
2013-01-09,0.490359,-0.954327,-0.774417,-0.927255
2013-01-10,0.001338,-2.185092,0.811966,0.168846
2013-01-11,0.643317,-0.160682,0.038613,1.196601
2013-01-12,-0.468789,-0.97417,-0.775552,-0.042716


You can also view a defined number of rows

In [30]:
df.head(3)

Unnamed: 0,A,B,C,D
2013-01-01,-0.709481,1.949193,2.203106,-0.343606
2013-01-02,-0.855094,-0.592326,0.655856,1.636701
2013-01-03,0.325173,1.948598,0.66336,-1.282217


In [26]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-10,0.001338,-2.185092,0.811966,0.168846
2013-01-11,0.643317,-0.160682,0.038613,1.196601
2013-01-12,-0.468789,-0.97417,-0.775552,-0.042716


Display the index columns

In [31]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
               '2013-01-09', '2013-01-10', '2013-01-11', '2013-01-12'],
              dtype='datetime64[ns]', freq='D')

Display the columns

In [32]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')