In [3]:
import pandas as pd
import numpy as np

In [4]:
pd.__version__

'1.5.3'

# SERIES

a single-column data

In [5]:
products = ['A','B','C','D']
type(products)

list

In [6]:
product_categories = pd.Series(products)
type(product_categories)

pandas.core.series.Series

In [7]:
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [8]:
daily_rates_dollars = pd.Series([40,45,50,60])
daily_rates_dollars

0    40
1    45
2    50
3    60
dtype: int64

In [9]:
array_a = np.array([10,20,30,40,50])
print(array_a)
type(array_a)

[10 20 30 40 50]


numpy.ndarray

In [10]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int32

In [11]:
series_a.dtype

dtype('int32')

In [12]:
series_a.size

5

In [13]:
product_categories.dtype

dtype('O')

In [14]:
product_categories.name = "Product Categories"
product_categories

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [15]:
prices_per_category = {'Product A': 22250, 'Product B':16600, 'Product C': 15600}
prices_per_category

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [16]:
prices_per_category = pd.Series(prices_per_category)
prices_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [17]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [18]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

In [19]:
prices_per_category[0]

22250

In [20]:
series_b = pd.Series([10,20,30,40,50], index = ["1", "10", "100", "1000", "10000"])

In [21]:
series_b

1        10
10       20
100      30
1000     40
10000    50
dtype: int64

In [22]:
# Value methods

series_b.sum()
series_b.max()
series_b.min()
series_b.mean()

30.0

In [23]:
# Index max/min

series_b.idxmax()
series_b.idxmin()

'1'

In [24]:
# First 5 rows
product_categories.head()

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [25]:
# Last 5 rows
product_categories.tail()

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

# DATA FRAMES

Series can be thought as single column of DF. DF is a collection of series. Tabular structure

![image.png](attachment:image.png)

In [28]:
# DF from dict of lists
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250,16600,12500]}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [29]:
# Explicit index
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250,16600,12500]}
product_IDs = ['A','B','C']
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [30]:
# DF from list of dicts
data = [{'ProductName': 'Product A', 'ProductPrice': 22250},
        {'ProductName': 'Product B', 'ProductPrice': 16600},
        {'ProductName': 'Product C', 'ProductPrice': 12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [31]:
s_products = pd.Series(['Product A', 'Product B', 'Product C'])
s_prices = pd.Series([22250, 16600, 12500])

In [32]:
# DF from pandas series
data = {'ProductName':s_products, 'ProductPrice':s_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [34]:
# Professional way
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]],
                  columns = ['ProductName', 'ProductPrice'],
                  index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [35]:
df.shape

(3, 2)