In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd

# use seaborn plotting defaults
import seaborn as sns; sns.set()

In [2]:
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}

obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [3]:
obj3.to_dict()

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [4]:
obj3.isna()

Ohio      False
Texas     False
Oregon    False
Utah      False
dtype: bool

In [5]:
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
        "year": [2000, 2001, 2002, 2001, 2002, 2003],
        "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

In [6]:
frame.state.name = 'stato'
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [7]:
pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [8]:
frame['debt'] = np.arange(6.0)
frame

Unnamed: 0,state,year,pop,debt
0,Ohio,2000,1.5,0.0
1,Ohio,2001,1.7,1.0
2,Ohio,2002,3.6,2.0
3,Nevada,2001,2.4,3.0
4,Nevada,2002,2.9,4.0
5,Nevada,2003,3.2,5.0


In [9]:
val = pd.Series([-1.2, -1.5, -1.7], index=[2, 4, 5])
frame.iloc[val.index.to_list(),3] = val
frame

Unnamed: 0,state,year,pop,debt
0,Ohio,2000,1.5,0.0
1,Ohio,2001,1.7,1.0
2,Ohio,2002,3.6,-1.2
3,Nevada,2001,2.4,3.0
4,Nevada,2002,2.9,-1.5
5,Nevada,2003,3.2,-1.7


In [10]:
frame.index.name = 'peppa'
frame.columns.name = 'pig'
frame

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000,1.5,0.0
1,Ohio,2001,1.7,1.0
2,Ohio,2002,3.6,-1.2
3,Nevada,2001,2.4,3.0
4,Nevada,2002,2.9,-1.5
5,Nevada,2003,3.2,-1.7


In [11]:
frame2 = frame
frame2

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000,1.5,0.0
1,Ohio,2001,1.7,1.0
2,Ohio,2002,3.6,-1.2
3,Nevada,2001,2.4,3.0
4,Nevada,2002,2.9,-1.5
5,Nevada,2003,3.2,-1.7


In [12]:
frame2 = frame2.reindex(list(range(0,10, 2)))
frame2

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000.0,1.5,0.0
2,Ohio,2002.0,3.6,-1.2
4,Nevada,2002.0,2.9,-1.5
6,,,,
8,,,,


In [13]:
frame2.reindex(np.arange(9), method='ffill')

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000.0,1.5,0.0
1,Ohio,2000.0,1.5,0.0
2,Ohio,2002.0,3.6,-1.2
3,Ohio,2002.0,3.6,-1.2
4,Nevada,2002.0,2.9,-1.5
5,Nevada,2002.0,2.9,-1.5
6,,,,
7,,,,
8,,,,


In [14]:
frame2.drop('debt', axis=1)

pig,state,year,pop
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Ohio,2000.0,1.5
2,Ohio,2002.0,3.6
4,Nevada,2002.0,2.9
6,,,
8,,,


In [15]:
arr = np.random.randint(0,7, size=(4,7))
arr

array([[2, 4, 4, 6, 2, 6, 1],
       [0, 1, 6, 2, 3, 3, 1],
       [3, 1, 1, 6, 6, 1, 4],
       [1, 4, 4, 3, 6, 3, 0]])

In [18]:
frame2.add(frame, fill_value='0')

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,OhioOhio,4000.0,3.0,0.0
1,0Ohio,2001.0,1.7,1.0
2,OhioOhio,4004.0,7.2,-2.4
3,0Nevada,2001.0,2.4,3.0
4,NevadaNevada,4004.0,5.8,-3.0
5,0Nevada,2003.0,3.2,-1.7
6,,,,
8,,,,


In [22]:
frame2

pig,state,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000.0,1.5,0.0
2,Ohio,2002.0,3.6,-1.2
4,Nevada,2002.0,2.9,-1.5
6,,,,
8,,,,


In [34]:
def f1(x):
    return round(x,-1)

frame2[['year', 'pop', 'debt']]

pig,year,pop,debt
peppa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2000.0,1.5,0.0
2,2002.0,3.6,-1.2
4,2002.0,2.9,-1.5
6,,,
8,,,


In [55]:
frame2[['year','pop','debt']].mean(axis=0, skipna=False)

pig
year   NaN
pop    NaN
debt   NaN
dtype: float64

In [65]:
frame2.describe(include=np.number)

pig,year,pop,debt
count,3.0,3.0,3.0
mean,2001.333333,2.666667,-0.9
std,1.154701,1.069268,0.793725
min,2000.0,1.5,-1.5
25%,2001.0,2.2,-1.35
50%,2002.0,2.9,-1.2
75%,2002.0,3.25,-0.6
max,2002.0,3.6,0.0


In [63]:
frame2[['year']].cumsum()

pig,year
peppa,Unnamed: 1_level_1
0,2000.0
2,4002.0
4,6004.0
6,
8,


In [46]:
arr.shape

(4, 7)

In [39]:
arr

array([[2, 4, 4, 6, 2, 6, 1],
       [0, 1, 6, 2, 3, 3, 1],
       [3, 1, 1, 6, 6, 1, 4],
       [1, 4, 4, 3, 6, 3, 0]])

In [48]:
arr.sum(axis=1)

array([25, 16, 22, 21])