## Series

In [1]:
import pandas as pd
import numpy as np

In [2]:
x = pd.Series([10, 20, 30, 40, 50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.index

RangeIndex(start=0, stop=5, step=1)

In [4]:
x.dtype

dtype('int64')

In [5]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [20]:
data = [450, 650, 870]
#index = ['Don', 'Mike', 'Edwin']
index = [2010, 2011, 2012]
sales = pd.Series(data, index=index)
sales.index.name = "Year"
sales.name = "Total Sales per year"
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [19]:
sales.index

Int64Index([2010, 2011, 2012], dtype='int64', name='Year')

In [22]:
sales[2012]
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [24]:
sales.iloc[0]

450

In [85]:
sales[2010]

450

In [87]:
sales.loc[2011] # index values

650

In [88]:
sales.iloc[0] # index locations / order

450

In [84]:
sales

Year
2010    450
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [28]:
sales > 500

Year
2010    False
2011     True
2012     True
Name: Total Sales per year, dtype: bool

In [30]:
sales[[False, True, True]]

Year
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [32]:
sales[sales>500]

Year
2011    650
2012    870
Name: Total Sales per year, dtype: int64

In [35]:
list(sales [sales > 500].values)

[650, 870]

In [89]:
650 in sales

False

In [90]:
2011 in sales

True

In [40]:
650 in sales.values

True

In [42]:
sales.to_dict()

{2010: 450, 2011: 650, 2012: 870}

In [46]:
sales_dict = {
    "Don" : 534,
    "Mike" : 453,
    "Edwin" : 412
    
}
sales_ser = pd.Series(sales_dict)
sales_ser

Don      534
Mike     453
Edwin    412
dtype: int64

In [92]:
new_sales = pd.Series(sales_dict, index=["Don", "Mike", "Sally", "Edwin", "Lucy"])
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [95]:
np.isnan(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [97]:
pd.isnull(new_sales)

Don      False
Mike     False
Sally     True
Edwin    False
Lucy      True
dtype: bool

In [94]:
new_sales.loc["Sally"] is None

False

In [55]:
n = np.nan
n

nan

In [57]:
type(n)

float

In [59]:
new_sales

Don      534.0
Mike     453.0
Sally      NaN
Edwin    412.0
Lucy       NaN
dtype: float64

In [98]:
new_sales.loc["Sally"] = 548
new_sales

Don      534.0
Mike     453.0
Sally    548.0
Edwin    412.0
Lucy       NaN
dtype: float64

In [65]:
new_sales.index.name = "Sales Person"

In [68]:
new_sales.name = "Total TV Sales"

In [70]:
new_sales

Sales Person
Don      534.0
Mike     453.0
Sally    540.0
Edwin    412.0
Lucy       NaN
Name: Total TV Sales, dtype: float64

## DataFrames
- Two-dimensional
- Size mutable
- Heterogenous
- Rows and columns(records and series)

In [71]:
sales_df = pd.DataFrame(new_sales)

In [74]:
sales_df

Unnamed: 0_level_0,Total TV Sales
Sales Person,Unnamed: 1_level_1
Don,534.0
Mike,453.0
Sally,540.0
Edwin,412.0
Lucy,


In [99]:
data = [['Adrian', 20], ['Bethany', 23], ['Chloe', 41]]
df = pd.DataFrame(data, columns=["Name", "Age"], dtype=int)
df

  df = pd.DataFrame(data, columns=["Name", "Age"], dtype=int)


Unnamed: 0,Name,Age
0,Adrian,20
1,Bethany,23
2,Chloe,41


In [102]:
new_dict = {
    "Name": ["Tom", "Jane", "Steve", "Lucy"],
    "Sales": [250, 300, 350, 420]
}

df_dict = pd.DataFrame(new_dict)
df_dict

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350
3,Lucy,420


In [110]:
list_dicts = [
    {"Name": "Tom", "Sales": 300},
    {"Name": "Greg", "Sales": 345},
    {"Name": "Simone", "Sales": 745},
    {"Name": "Paula", "Sales": 542, "Performance": "Outstanding"},
    {"Name": "Yuri", "Sales": 453},
]

df_list_dicts = pd.DataFrame(list_dicts)
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [111]:
df_list_dicts["Sales"]

0    300
1    345
2    745
3    542
4    453
Name: Sales, dtype: int64

In [112]:
df_list_dicts["Name"]

0       Tom
1      Greg
2    Simone
3     Paula
4      Yuri
Name: Name, dtype: object

In [113]:
df_list_dicts.loc[3]

Name                 Paula
Sales                  542
Performance    Outstanding
Name: 3, dtype: object

In [114]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance
0,Tom,300,
1,Greg,345,
2,Simone,745,
3,Paula,542,Outstanding
4,Yuri,453,


In [116]:
df_list_dicts["Grades"] = ["A", "A", "B", "A", "C"]
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [117]:
df_list_dicts.drop("Performance", axis=1)

Unnamed: 0,Name,Sales,Grades
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C


In [118]:
df_list_dicts.drop(1)

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [119]:
df_list_dicts

Unnamed: 0,Name,Sales,Performance,Grades
0,Tom,300,,A
1,Greg,345,,A
2,Simone,745,,B
3,Paula,542,Outstanding,A
4,Yuri,453,,C


In [121]:
df_list_dicts[["Name", "Sales", "Grades"]]

Unnamed: 0,Name,Sales,Grades
0,Tom,300,A
1,Greg,345,A
2,Simone,745,B
3,Paula,542,A
4,Yuri,453,C
