In [37]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

In [38]:
# There're several forms of creating a DataFrame, and one of them, is through a dictionary of lists
# Another form, is using dictionaries of dictionaries

data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Neavada', 'Nevada', 'Nevada'],
       'year': [2000, 2001, 2002, 2001, 2002, 2003],
       'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Neavada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [39]:
# We may use the method head to take only the first five lines in a large DataFrame

frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Neavada,2001,2.4
4,Nevada,2002,2.9


In [40]:
# We can also organize the columns

pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Neavada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [41]:
# Changing the indexes and creating an empty column
# If you create an empty column this way, the values attributed to it will be shown as NaN type

frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'], 
                      index=['one', 'two', 'three', 'four', 'five', 'six'])

frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Neavada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [42]:
# It's possible to obtain a column or a line in a simple way
# In order to obtain a line, we oughta to use 'loc' method

second_column = frame2['state'] # or second_column = fram2.state
third_line = frame2.loc['three'] # in here we can not use the form we used above
print(second_column)
print('\n')
print(third_line)

one         Ohio
two         Ohio
three       Ohio
four     Neavada
five      Nevada
six       Nevada
Name: state, dtype: object


year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object


In [43]:
# To add values we make a simple attribution or use a numpy array
# Plus, it's possible to add according to index

# frame2['debt'] = 16.5
# frame2

# frame2['debt'] = np.arange(6.)

val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Neavada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [44]:
# To delete a column, use the 'del' method

frame2['eastern'] = frame2.state == 'Ohio'
del frame2['eastern']
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Neavada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [45]:
# Using reindex for fixing indexes

obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) # as the 'e' isn't listed, it's gonna have NaN values type
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [46]:
# it's possible to drop an object or values with the 'drop' method,
# which returns a new object woth the values deleted

obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [47]:
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [48]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [49]:
# a more sophisticated example

data = pd.DataFrame(np.arange(16).reshape(4, 4),
                   index=['Ohio', 'Colorado', 'Utah', 'New York'],
                   columns=['one', 'two', 'three', 'four'])

data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [50]:
data.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [51]:
data.drop('two', axis=1) # deleting a column instead of a line

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [52]:
data.drop(['two', 'four'], axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [65]:
# applying maping functions

frame = pd.DataFrame(np.random.randn(4, 3), 
                     columns=list('bde'),
                    index=['Utah', 'Ohio', 'Texas', 'Oregon'])

frame

Unnamed: 0,b,d,e
Utah,-0.538757,-0.564518,1.499806
Ohio,0.038117,-0.391317,0.197554
Texas,-0.243554,1.306123,0.569564
Oregon,-0.282926,0.855771,0.513691


In [66]:
np.abs(frame) # calculates the absolute value of every each element of the DataFrame

Unnamed: 0,b,d,e
Utah,0.538757,0.564518,1.499806
Ohio,0.038117,0.391317,0.197554
Texas,0.243554,1.306123,0.569564
Oregon,0.282926,0.855771,0.513691


In [67]:
# calculates the total amplitude of the DataFrame
# we ougtha use the method apply, in order to aplly the function to the DataFrame

f = lambda x: x.max() - x.min() 
frame.apply(f)

b    0.576874
d    1.870640
e    1.302253
dtype: float64

In [70]:
frame.apply(f, axis='columns') # for fixing the columns

Utah      2.064324
Ohio      0.588870
Texas     1.549677
Oregon    1.138697
dtype: float64

In [71]:
# we use the method 'sort_index' to ordenate a DataFrame and a Series

obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [72]:
# another example

frame = pd.DataFrame(np.arange(8).reshape(2, 4),
                    index=['three', 'one'],
                    columns=['d', 'a', 'b', 'c'])

frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [73]:
#  sorting through the columns

frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [74]:
# it's also possible to sort by an descending order

frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [84]:
# now, sorting by the values, but the method requires an argument and only works for columns

frame.sort_values(by='a', ascending=False)

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3
