# Pandas Basic

Ref: Data Wrangling with Pandas Cheat Sheet \
Website http://pandas.pydata.org

In [1]:
import pandas as pd

## 1.1 Creating DataFrames

In [2]:
# Specify values for each column
df = pd.DataFrame({"a":[4,5,6],
                   "b":[7,8,9],
                   "c":[11,11,12]}, index = [1,2,3])
df

Unnamed: 0,a,b,c
1,4,7,11
2,5,8,11
3,6,9,12


In [3]:
# Specify values for each row
df = pd.DataFrame([[4,7,10],[5,8,11],[6,9,12]], index = [1,2,3], columns = ["a","b","c"])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12


In [4]:
# Create DataFrame with MultiIndex
df = pd.DataFrame({"a":[4,5,6],"b":[7,8,9],"c":[10,11,12]},
                  index = pd.MultiIndex.from_tuples([('D',1),('D',2),('e',2)], names = ['N','v']))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
N,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
D,1,4,7,10
D,2,5,8,11
e,2,6,9,12


## 1.2 Method Chaining

In [5]:
df = (pd.melt(df)
      .rename(columns = {
              'variable':'var',
              'value':'val'})
      .query('val>=200')
     )
df

Unnamed: 0,var,val


## 1.3 Reshaping Data
- Change layout, sorting, reindexing, renaming

In [6]:
df = pd.DataFrame({"a":[1,2],"b":[3,4],"c":[5,6],"d":[7,8]}, index = [1,2])

In [7]:
df

Unnamed: 0,a,b,c,d
1,1,3,5,7
2,2,4,6,8


In [8]:
pd.melt(df)

Unnamed: 0,variable,value
0,a,1
1,a,2
2,b,3
3,b,4
4,c,5
5,c,6
6,d,7
7,d,8


In [9]:
#df.pivot(columns="a", values=2)

In [10]:
# Append rows of DataFrame
pd.concat([df,df])

Unnamed: 0,a,b,c,d
1,1,3,5,7
2,2,4,6,8
1,1,3,5,7
2,2,4,6,8


In [11]:
# Append columns of DataFrames
pd.concat([df,df],axis=1)

Unnamed: 0,a,b,c,d,a.1,b.1,c.1,d.1
1,1,3,5,7,1,3,5,7
2,2,4,6,8,2,4,6,8


In [28]:
mpg = [100,22,13,54,25,66,77]
df1 = pd.DataFrame({"a":[1,2,3,4,5,6,7],"mpg":mpg}, index = [1,2,3,4,5,6,7])
df1

Unnamed: 0,a,mpg
1,1,100
2,2,22
3,3,13
4,4,54
5,5,25
6,6,66
7,7,77


In [29]:
df1.sort_values('mpg')

Unnamed: 0,a,mpg
3,3,13
2,2,22
5,5,25
4,4,54
6,6,66
7,7,77
1,1,100


In [30]:
df1.sort_values('mpg',ascending=False)

Unnamed: 0,a,mpg
1,1,100
7,7,77
6,6,66
4,4,54
5,5,25
2,2,22
3,3,13


In [31]:
df1.rename(columns = {"mpg":"MPG"})

Unnamed: 0,a,MPG
1,1,100
2,2,22
3,3,13
4,4,54
5,5,25
6,6,66
7,7,77


In [32]:
df1.sort_index()

Unnamed: 0,a,mpg
1,1,100
2,2,22
3,3,13
4,4,54
5,5,25
6,6,66
7,7,77


In [37]:
df2 = df1.reset_index()
df2

Unnamed: 0,index,a,mpg
0,1,1,100
1,2,2,22
2,3,3,13
3,4,4,54
4,5,5,25
5,6,6,66
6,7,7,77


In [38]:
df2.drop(columns=["index"])

Unnamed: 0,a,mpg
0,1,100
1,2,22
2,3,13
3,4,54
4,5,25
5,6,66
6,7,77


## 1.5 Subset Observation - rows

## 1.6 Subset Variables - columns

## 1.7 Subsets - rows and columns

## 1.8 Using query

## 2.1 Summarize Data

## 2.2 Handling Missing Data

## 2.3 Make New Columns

## 2.4 Group Data

## 2.5 Windows

## 2.6 Plotting

## 2.7 Combine Data Sets

# Numpy Basic

In [12]:
import numpy as np
import math

In [13]:
A = np.array([1,-1,-1,1]).reshape(2,2)

In [14]:
A

array([[ 1, -1],
       [-1,  1]])

In [15]:
eigenvalue, eigenvector = np.linalg.eig(A)

In [16]:
eigenvalue

array([2., 0.])

In [17]:
eigenvector

array([[ 0.70710678,  0.70710678],
       [-0.70710678,  0.70710678]])