<a href="https://colab.research.google.com/github/Kavi-Sarna/365-python/blob/main/4%20-%20Data%20Cleaning%20and%20Preprocessing%20with%20pandas/1%20-%20Basics/4%20-%20pandas%20DataFrames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

<u>**#1: Construct a DataFrame from a dictionary of lists**</u>

In [2]:
data = { 'ProductName': [ 'Product A', 'Product B', 'Product C' ], 'ProductPrice': [ 22250, 16600, 12500 ] }
data

{'ProductName': ['Product A', 'Product B', 'Product C'],
 'ProductPrice': [22250, 16600, 12500]}

In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [5]:
data = { 'ProductName': [ 'Product A', 'Product B', 'Product C', 'Product D' ], 'ProductPrice': [ 22250, 16600, 12500 ]}
df = pd.DataFrame(data)
df

ValueError: ignored

<u>**#2: Construct a DataFrame from a dictionary of lists + specify an index**</u>

In [6]:
data = { 'ProductName': [ 'Product A', 'Product B', 'Product C' ], 'ProductPrice': [ 22250, 16600, 12500 ] }
df = pd.DataFrame(data, index=['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [7]:
data = { 'ProductName': [ 'Product A', 'Product B', 'Product C' ], 'ProductPrice': [ 22250, 16600, 12500 ] }
product_IDs = [ 'A', 'B', 'C' ]
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#3: Construct a DataFrame from a list of dictionaries**<u/>

In [8]:
data = [{'ProductName':'Product A', 'ProductPrice':22250},
        {'ProductName':'Product B', 'ProductPrice':16600},
        {'ProductName':'Product C', 'ProductPrice':12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [9]:
data = [{'ProductName':'Product A', 'ProductPrice':22250},
        {'ProductName':'Product B', 'ProductPrice':16600},
        {'ProductName':'Product C', 'ProductPrice':[12500, 10000]}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,"[12500, 10000]"


In [10]:
data = [{'ProductName':'Product A', 'ProductPrice':22250},
        {'ProductName':'Product B', 'ProductPrice':16600},
        {'ProductName':'Product C', 'ProductPrice':12500},
        {'ProductName':'Product D'}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250.0
1,Product B,16600.0
2,Product C,12500.0
3,Product D,


<u>**#4: Construct a DataFrame from a dictionary of pandas Series**<u/>

In [11]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 12500])

In [12]:
data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [13]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['A', 'B', 'C'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [14]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['C', 'B', 'A'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,12500
B,Product B,16600
C,Product C,22250


<u>**#5: Construct a DataFrame from a list of lists**<u/>

In [15]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [16]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500, 5000]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,Product A,22250,
1,Product B,16600,
2,Product C,12500,5000.0


In [17]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [18]:
df.columns = ['ProductName', 'ProductPrice']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [19]:
df.index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#6: Construct a DataFrame in a Professional Way**<u/>

In [20]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]],
                  columns = ['ProductName', 'ProductPrice'],
                  index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [21]:
df.shape

(3, 2)