# Series objects

In [None]:
# Creating a series

import pandas as pd
s = pd.Series([2,-1,3,5])
print(s)

In [None]:
# Pass as parameters to NumPy functions

import numpy as np
np.square(s)

In [None]:
# Arithmetic operation on the series

s + [1000,2000,3000,4000]

In [None]:
# Broadcasting

s + 1000

In [None]:
# Binary and conditional operations

s < 0

In [None]:
# Index labels

# Each item in a Series object has a unique identifier called the index label.
# By default, it is simply the rank of the item in the Series (starting at 0)

s2 = pd.Series([68, 83, 112, 68])
print(s2)

In [None]:
# Set the index labels manually

s2 = pd.Series([68, 83, 112, 68], index=["alice", "bob", "charles", "darwin"])
print(s2)

In [None]:
# Access the items in series
# We can either access the items by specifying integer location or label

print(s2[1])
print(s2["bob"])

In [None]:
# It is recommended to always use the loc attribute when accessing by label, 
# and the iloc attribute when accessing by integer location

s2.loc["bob"]
s2.iloc[1]

In [None]:
# Init from dict
# We can create Series object from Python dict

weights = {"alice": 68, "bob": 83, "colin": 86, "darwin": 68}
s3 = pd.Series(weights)
print(s3)

In [None]:
# We can control which elements you want to include in the Series and 
# in what order by explicitly specifying the desired index

s4 = pd.Series(weights, index = ["colin", "alice"])
print(s4)

In [None]:
# Automatic alignment
# When an operation involves multiple Series objects, pandas automatically aligns items by matching index labels
# Note NaN


print(s2)
print("\n")
print(s3)
print("\n")
print(s2+s3)


In [None]:
# Do not forget to set the right index labels, else you may get surprising results

s5 = pd.Series([1000,1000,1000,1000])
print(s2)
print("\n")
print(s5)
print("\n")

print(s2 + s5)

In [None]:
# Init with a scalar

meaning = pd.Series(42, ["life", "universe", "everything"])
print(meaning)

In [None]:
# Series name
# Here the series name is weights

s6 = pd.Series([83, 68], index=["bob", "alice"], name="weights")
print(s6)

In [None]:
# Plotting a series

%matplotlib inline
import matplotlib.pyplot as plt
temperatures = [4.4,5.1,6.1,6.2,6.1,6.1,5.7,5.2,4.7,4.1,3.9,3.5]
s7 = pd.Series(temperatures, name="Temperature")
s7.plot()
plt.show()

# DataFrame objects

In [None]:
# We can create a DataFrame by passing a dictionary of Series objects:

people_dict = {
    "weight": pd.Series([68, 83, 112], index=["alice", "bob", "charles"]),
    "birthyear": pd.Series([1984, 1985, 1992], index=["bob", "alice", "charles"], name="year"),
    "children": pd.Series([0, 3], index=["charles", "bob"]),
    "hobby": pd.Series(["Biking", "Dancing"], index=["alice", "bob"]),
}
people = pd.DataFrame(people_dict)
people

In [None]:
# Access a column

people["birthyear"]

In [None]:
# Access the multiple columns at once

people[["birthyear", "hobby"]]

In [None]:
# If you pass a list of columns and/or index row labels to the DataFrame constructor, 
# It will guarantee that these columns and/or rows will exist, in that order, and no other column/row will exist

d2 = pd.DataFrame(
        people_dict,
        columns=["birthyear", "weight", "height"],
        index=["bob", "alice", "eugene"]
     )
print(d2)

In [None]:
# Accessing rows
# Using loc

people.loc["charles"]

In [None]:
# Accessing rows
# Using iloc

people.iloc[2]

In [None]:
# Get a slice of rows

people.iloc[1:3]

In [None]:
# Pass a boolean array to get the matching rows

people[np.array([True, False, True])]

In [None]:
# Pass boolean expression

people[people["birthyear"] < 1990]

In [None]:
# Adding and removing columns

people["age"] = 2016 - people["birthyear"]  # adds a new column "age"
people["over 30"] = people["age"] > 30      # adds another column "over 30"
birthyears = people.pop("birthyear")
del people["children"]

people


In [None]:
# When you add a new colum, it must have the same number of rows. 
# Missing rows are filled with NaN, and extra rows are ignored

people["pets"] = pd.Series({
"bob": 0, 
"charles": 5,
"eugene":1
}) 

people

In [None]:
# Add a new column using insert method

people.insert(1, "height", [172, 181, 185])
people

In [None]:
# Add new column using assign method

people = (people
     .assign(body_mass_index = lambda df: df["weight"] / (df["height"] / 100) ** 2)
     .assign(overweight = lambda df: df["body_mass_index"] > 25)
)

In [None]:
# Querying a DataFrame

people.query("age > 30 and pets == 0")

In [None]:
# Sorting a DataFrame
# By default it sorts the rows by their index label, in ascending order
# Let's reverse the order
people.sort_index(ascending=False)

In [None]:
# Inplace sorting

people.sort_index(inplace=True)
people

In [None]:
# Sort by value

people.sort_values(by="age", inplace=True)
people

In [None]:
# Plotting a DataFrame

people.plot(
    kind = "line", 
    x = "body_mass_index", 
    y = ["height", "weight"]
)

plt.show()

In [None]:
# Saving and Loading
# Let’s create a new DataFrame my_df and save it in various formats

my_df = pd.DataFrame(
    [["Biking", 68.5, 1985, np.nan], ["Dancing", 83.1, 1984, 3]], 
    columns=["hobby","weight","birthyear","children"],
    index=["alice", "bob"]
)
my_df


In [None]:
# Save to CSV, HTML and JSON

my_df.to_csv("my_df.csv")
my_df.to_html("my_df.html")
my_df.to_json("my_df.json")

In [None]:
# Let's take a peek at what was saved

for filename in ("my_df.csv", "my_df.html", "my_df.json"):
    print("#", filename)
    with open(filename, "rt") as f:
        print(f.read())
        print()

In [None]:
# Load CSV file

my_df_loaded = pd.read_csv("my_df.csv", index_col=0)
my_df_loaded