# Pandas
`pip install pandas`

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Creating a Dataframe


In [2]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])  #### 10x4
print(df)

          A         B         C         D
0  0.642673  0.232544  0.364436  0.853916
1  0.306394  0.441284  0.985583  0.315221
2  0.194978  0.579345  0.649756  0.534179
3  0.512704  0.937062  0.069745  0.863659
4  0.283660  0.030515  0.071280  0.548678
5  0.389815  0.811168  0.407324  0.863048
6  0.426538  0.842496  0.245545  0.823362
7  0.845586  0.186496  0.124647  0.678668
8  0.566842  0.450344  0.650385  0.532239
9  0.775233  0.948415  0.809952  0.850432


#### Creating a Dataframe with dictionary


In [3]:
df2 = pd.DataFrame({'A':[10,20],'B':[20,100],'C':[30,40],'D':[40,50]})
print(df2)

    A    B   C   D
0  10   20  30  40
1  20  100  40  50


#### looking at the variable types


In [4]:
df2.dtypes

A    int64
B    int64
C    int64
D    int64
dtype: object

#### looking at the top few values, default 5


In [5]:
df.head()

Unnamed: 0,A,B,C,D
0,0.642673,0.232544,0.364436,0.853916
1,0.306394,0.441284,0.985583,0.315221
2,0.194978,0.579345,0.649756,0.534179
3,0.512704,0.937062,0.069745,0.863659
4,0.28366,0.030515,0.07128,0.548678


#### looking at the last few values


In [6]:
df.tail()

Unnamed: 0,A,B,C,D
5,0.389815,0.811168,0.407324,0.863048
6,0.426538,0.842496,0.245545,0.823362
7,0.845586,0.186496,0.124647,0.678668
8,0.566842,0.450344,0.650385,0.532239
9,0.775233,0.948415,0.809952,0.850432


#### To get the indexes


In [7]:
df.index

RangeIndex(start=0, stop=10, step=1)

#### To get the column names


In [8]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

#### To get the values


In [9]:
df.values

array([[0.64267298, 0.23254385, 0.36443583, 0.85391641],
       [0.30639356, 0.44128362, 0.9855828 , 0.31522118],
       [0.19497813, 0.57934498, 0.64975633, 0.53417913],
       [0.51270405, 0.93706248, 0.06974528, 0.86365858],
       [0.28365973, 0.03051514, 0.0712805 , 0.54867775],
       [0.38981461, 0.81116771, 0.40732375, 0.86304784],
       [0.42653756, 0.84249581, 0.24554507, 0.82336152],
       [0.84558617, 0.1864959 , 0.12464677, 0.67866837],
       [0.56684174, 0.4503438 , 0.6503852 , 0.5322395 ],
       [0.77523346, 0.94841488, 0.80995245, 0.85043161]])

#### To get a statistical summary of the DataFrame


In [10]:
df.describe

<bound method NDFrame.describe of           A         B         C         D
0  0.642673  0.232544  0.364436  0.853916
1  0.306394  0.441284  0.985583  0.315221
2  0.194978  0.579345  0.649756  0.534179
3  0.512704  0.937062  0.069745  0.863659
4  0.283660  0.030515  0.071280  0.548678
5  0.389815  0.811168  0.407324  0.863048
6  0.426538  0.842496  0.245545  0.823362
7  0.845586  0.186496  0.124647  0.678668
8  0.566842  0.450344  0.650385  0.532239
9  0.775233  0.948415  0.809952  0.850432>

#### For sorting the index


In [11]:
df.sort_index(axis=0,ascending=False)

Unnamed: 0,A,B,C,D
9,0.775233,0.948415,0.809952,0.850432
8,0.566842,0.450344,0.650385,0.532239
7,0.845586,0.186496,0.124647,0.678668
6,0.426538,0.842496,0.245545,0.823362
5,0.389815,0.811168,0.407324,0.863048
4,0.28366,0.030515,0.07128,0.548678
3,0.512704,0.937062,0.069745,0.863659
2,0.194978,0.579345,0.649756,0.534179
1,0.306394,0.441284,0.985583,0.315221
0,0.642673,0.232544,0.364436,0.853916


#### For sorting by a Column


In [12]:
df.sort_values(by='A')

Unnamed: 0,A,B,C,D
2,0.194978,0.579345,0.649756,0.534179
4,0.28366,0.030515,0.07128,0.548678
1,0.306394,0.441284,0.985583,0.315221
5,0.389815,0.811168,0.407324,0.863048
6,0.426538,0.842496,0.245545,0.823362
3,0.512704,0.937062,0.069745,0.863659
8,0.566842,0.450344,0.650385,0.532239
0,0.642673,0.232544,0.364436,0.853916
9,0.775233,0.948415,0.809952,0.850432
7,0.845586,0.186496,0.124647,0.678668
