# Pandas
It works with "Dataframe" objects, really close to databases, with unique keys, timestamps and mixed data.

Two basic structures:
* Series
* Dataframes


In [1]:
import pandas as pd

## Series

In [2]:
"""
We create a Serie and pass a first array with the index
if we don't put an index, it automatically creates a unique and incremental key
We can mix strings with integers and floats and it's alright.
They are really similar to python's dictionaries.
"""
serie = pd.Series( data= [100, "None", 300, "Text", 5.3], index= ["pablo", "juan", "pedro", "daniel", "enrique"])
serie

pablo       100
juan       None
pedro       300
daniel     Text
enrique     5.3
dtype: object

In [3]:
# Obtaining the index
print(serie.index)
# Accesing to a value from a certain index
print(serie["juan"])
# This method works as same as the previous one
print(serie.loc["juan"])
#But with loc you can pass an array of indexes
print(serie.loc[["juan", "pablo"]])

Index(['pablo', 'juan', 'pedro', 'daniel', 'enrique'], dtype='object')
None
None
juan     None
pablo     100
dtype: object


In [4]:
# Another way of accesing multiple indexes is this:
print(serie[[0,1,2]])
print()
# But with loc you can't do this, in this case you need to use .iloc
print(serie.iloc[[0,1,2]])

pablo     100
juan     None
pedro     300
dtype: object

pablo     100
juan     None
pedro     300
dtype: object


In [5]:
# Let's do a consult of an index in the Serie
print("pablo" in serie)
print("jose" in serie)

True
False


In [6]:
#We can operate the values inside the Series as long as the data type can be operated
print("Multiplication\n",serie * 3)

Multiplication
 pablo               300
juan       NoneNoneNone
pedro               900
daniel     TextTextText
enrique            15.9
dtype: object


In [7]:
# This results in an error since you cannot square a string
serie ** 3

TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int'

In [None]:
# We can fix this by operating only with integer or decimal values
serie[["pablo", "pedro", "enrique"]] ** 2

# Dataframe
Series can be defined as the bricks of the Dataframes, and now we're going to put them to use. We can create a dataframe from a Python's dictionary.

In [9]:
"""
Winter and autumn are columns (keys of the dictionary), and the Series are the values, inside the series
there are also values and indexes.
Variable "d" is a dictionary
"""
d = {"winter": pd.Series([100., 200., 300.], index=["apple", "peach", "orange"]),
    "autumn" : pd.Series([111., 222., 333., 4444.], index=["apple", "peach", "strawberry", "grape"])}
print(type(d))

<class 'dict'>


In [11]:
# This way you aren't forced into having the same number of indexes/values inside the dataframe Serie.
df = pd.DataFrame(d)
df

Unnamed: 0,winter,autumn
apple,100.0,111.0
grape,,4444.0
orange,300.0,
peach,200.0,222.0
strawberry,,333.0


In [12]:
df.index

Index(['apple', 'grape', 'orange', 'peach', 'strawberry'], dtype='object')

In [13]:
df.columns

Index(['winter', 'autumn'], dtype='object')

In [14]:
"""
We can create a dataframe specifying the indexes that we want to use, this time we have to pass 
the previously initialized dictionary as the 1st parameter, then the indexes we want to use inside that
dictionary that we passed to the method.
"""
df2 = pd.DataFrame(d, index = ["apple", "peach", "strawberry"])
print(df2)

            winter  autumn
apple        100.0   111.0
peach        200.0   222.0
strawberry     NaN   333.0


In [15]:
# Creating a dataframe specifying the columns we want to use, even adding another new column.
df2 = pd.DataFrame(d, index = ["apple", "peach", "strawberry"], columns=["winter", "autumn", "summer"])
print(df2)

            winter  autumn summer
apple        100.0   111.0    NaN
peach        200.0   222.0    NaN
strawberry     NaN   333.0    NaN


# We can create a Dataframe from a List of Dictionaries of Python

In [16]:
data = [{"paulus": 1, "jon": 2}, {"peter":5, "julia":10, "maria": 20}]

In [17]:
#Let's see how the indexes are assigned automatically
pd.DataFrame(data)

Unnamed: 0,paulus,jon,peter,julia,maria
0,1.0,2.0,,,
1,,,5.0,10.0,20.0


In [19]:
# It's possible to override specific indexes
pd.DataFrame(data, index=["green", "red"])

Unnamed: 0,paulus,jon,peter,julia,maria
green,1.0,2.0,,,
red,,,5.0,10.0,20.0


In [28]:
# And we can create the dataframe only with some of the columns
print(pd.DataFrame(data, columns=["jon", "peter"]))
print(pd.DataFrame(data, columns=["jon", "peter"], index=["green", "red"]))

   jon  peter
0  2.0    NaN
1  NaN    5.0
       jon  peter
green  2.0    NaN
red    NaN    5.0


# Basic operations with DataFrames