# Introduction to pandas dataframes 

In [1]:
import pandas as pd

# Creating a Dataframe from scratch

## <u>#1 : Construct a Dataframe from a dictionary of list</u>

In [4]:
data = {'ProductName': ['Product A', 'Product B', 'Product C'], 'ProductPrice': [22250, 16600, 15600]}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,15600


In [5]:
data = {'ProductName': ['Product A', 'Product B', 'Product C', 'Product D'], 'ProductPrice': [22250, 16600, 15600]}
df = pd.DataFrame(data)
df
# The array lengths are different hence the error

ValueError: All arrays must be of the same length

## <u>#2 : Construct a Dataframe from a dictionary of lists + specify an index</u>

In [6]:
data = {'ProductName': ['Product A', 'Product B', 'Product C'], 'ProductPrice': [22250, 16600, 15600]}
df = pd.DataFrame(data, index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,15600


In [7]:
data = {'ProductName': ['Product A', 'Product B', 'Product C'], 'ProductPrice': [22250, 16600, 15600]}
product_IDs = ['A', 'B', 'C']
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,15600


## <u>#3: Construct a Dataframe from a list of dictionaries</u>

In [10]:
data = [{'ProductName': 'Product A', 'ProductPrice': 22250},
        {'ProductName': 'Product B', 'ProductPrice': 16500},
        {'ProductName': 'Product C', 'ProductPrice': 15500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16500
2,Product C,15500


In [11]:
data = [{'ProductName': 'Product A', 'ProductPrice': 22250},
        {'ProductName': 'Product B', 'ProductPrice': 16500},
        {'ProductName': 'Product C', 'ProductPrice': [15500, 60000]}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16500
2,Product C,"[15500, 60000]"


In [12]:
data = [{'ProductName': 'Product A', 'ProductPrice': 22250},
        {'ProductName': 'Product B', 'ProductPrice': 16500},
        {'ProductName': 'Product C', 'ProductPrice': 15500},
        {'ProductName': 'Product D'}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250.0
1,Product B,16500.0
2,Product C,15500.0
3,Product D,


## <u>#4: Construct a DataFrame from a dictionary of Pandas series</u>

In [13]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 15600])

In [15]:
data = {'ProductName': ser_products, 'ProductPrice': ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,15600


In [16]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 15600], index = ['A', 'B', 'C'])

data = {'ProductName': ser_products, 'ProductPrice': ser_prices}
df = pd.DataFrame(data)
df
# Indexing

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,15600


In [19]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 15600], index = ['C', 'B', 'A'])

data = {'ProductName': ser_products, 'ProductPrice': ser_prices}
df = pd.DataFrame(data)
df
# Changing the order of the indexing doesn't give an error, it only rearranges it according to the first indexing

Unnamed: 0,ProductName,ProductPrice
A,Product A,15600
B,Product B,16600
C,Product C,22250


## <u>#5: Constructing a DataFrame from a list of lists</u>

In [22]:
data = [['Product A', 2250], ['Product B', 16600], ['Product C', 15600]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,2250
1,Product B,16600
2,Product C,15600


In [21]:
data = [['Product A', 2250], ['Product B', 16600], ['Product C', 15600, 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,Product A,2250,
1,Product B,16600,
2,Product C,15600,12500.0


In [27]:
df.columns = ['ProductName', 'ProductPrice']
df.index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,2250
B,Product B,16600
C,Product C,15600


## <u>#6: Constructing a Dataframe in a Professional way</u>

In [29]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 15600]],
                  columns = ['ProductName', 'ProductPrice'],
                  index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,15600


In [30]:
df.shape

(3, 2)