# Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
x.dtype

dtype('int64')

In [5]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [11]:
data = [450, 650, 870]
index = ['Don', 'Mike', 'Edwin']
sales = pd.Series(data, index=index)
sales.index.name = "Employees"
sales.name = "Total sales by Employee"
sales

Employees
Don      450
Mike     650
Edwin    870
Name: Total sales by Employee, dtype: int64

In [10]:
sales.index

Index(['Don', 'Mike', 'Edwin'], dtype='object', name='Employees')

In [12]:
sales[0]

450

In [13]:
sales["Don"]

450

In [14]:
sales.loc["Don"]  # index values

450

In [15]:
sales.iloc[0]  # index loactions / order

450

In [16]:
sales > 500

Employees
Don      False
Mike      True
Edwin     True
Name: Total sales by Employee, dtype: bool

In [17]:
sales[[False, True, True]]

Employees
Mike     650
Edwin    870
Name: Total sales by Employee, dtype: int64

In [20]:
sales[sales > 500]

Employees
Mike     650
Edwin    870
Name: Total sales by Employee, dtype: int64

In [21]:
sales[sales > 500].index

Index(['Mike', 'Edwin'], dtype='object', name='Employees')

In [22]:
sales[sales > 500].values

array([650, 870], dtype=int64)

In [23]:
sales

Employees
Don      450
Mike     650
Edwin    870
Name: Total sales by Employee, dtype: int64

In [24]:
650 in sales

False

In [25]:
"Don" in sales

True

In [26]:
650 in sales.values

True

In [27]:
sales.to_dict()

{'Don': 450, 'Mike': 650, 'Edwin': 870}

In [28]:
sales_dict = {
    "Don": 534,
    "Mike": 453,
    "Edwin": 412
}
sales_ser = pd.Series(sales_dict)
sales_ser

Don      534
Mike     453
Edwin    412
dtype: int64

In [29]:
new_sales = pd.Series(sales_dict, index=['Don', 'Mike', 'Sally', 'Edwin', 'Lucy'])
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [30]:
np.isnan(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [35]:
pd.isnull(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [31]:
new_sales.loc['Sally'] is None

False

In [33]:
n = np.nan
n

nan

In [34]:
type(n)

float

In [36]:
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [37]:
new_sales.loc["Sally"] = 548
new_sales

Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
dtype: float64

In [38]:
new_sales.index.name = "Sales Person"

In [40]:
new_sales.name = "Total TV Sales"

In [41]:
new_sales

Sales Person
Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
Name: Total TV Sales, dtype: float64

# DataFrames
- Two-dimensional
- Size-mutable
- Heterogenous
- Rows and columns (records and series)

In [43]:
sales_df = pd.DataFrame(new_sales)

In [44]:
sales_df

Unnamed: 0_level_0,Total TV Sales
Sales Person,Unnamed: 1_level_1
Don,534.0
Mike,453.0
Sally,548.0
Edwin,412.0
Lucy,


In [48]:
data = [["Adrian", 20], ["Bethany", 23], ["Chloe", 41]]
df = pd.DataFrame(data, columns=["Name", "Age"])
df

Unnamed: 0,Name,Age
0,Adrian,20
1,Bethany,23
2,Chloe,41


In [49]:
new_dict = {
    "Name": ["Tom", "Jane", "Steve", "Lucy"],
    "Sales": [250, 300, 350, 420]
}
df_dict = pd.DataFrame(new_dict)
df_dict

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,420


In [56]:
list_dicts = [
    {"Name": "Tom", "Sales": 300},
    {"Name": "Greg", "Sales": 345},
    {"Name": "Simone", "Sales": 745},
    {"Name": "Paula", "Sales": 542, "Performance": "Outstanding"},
    {"Name": "Yuri", "Sales": 453},
]

df_lists_dicts = pd.DataFrame(list_dicts)
df_lists_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [57]:
df_lists_dicts["Sales"]  # Returns as series

0    300
1    345
2    745
3    542
4    453
Name: Sales, dtype: int64

In [58]:
df_lists_dicts["Name"]  # DataFrames are mulitple series together

0       Tom
1      Greg
2    Simone
3     Paula
4      Yuri
Name: Name, dtype: object

In [59]:
df_lists_dicts.loc[3]

Name                 Paula
Sales                  542
Performance    Outstanding
Name: 3, dtype: object

In [61]:
df_lists_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [62]:
df_lists_dicts["Grade"] = ["A", "A", "B", "A", "C"]
df_lists_dicts

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [63]:
df_lists_dicts.drop("Performance", axis=1)  # Drops on axis 1 which are columns

Unnamed: 0,Name,Sales,Grade
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C


In [64]:
df_lists_dicts.drop(1)  # Drops based on row by default

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [65]:
df_lists_dicts

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [66]:
df_lists_dicts[["Name", "Sales", "Grade"]]

Unnamed: 0,Name,Sales,Grade
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C
