### Pandas DataFrame

Either from dictionary of lists, dictionaries series from a list of dictionaries

Dict keys will become col labels of Data Frame

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [6]:
d = {'Price': [19,22,13,54,65], 'Sales': [600,756,587,859,990]}
frame = DataFrame(d)
print(frame)

   Price  Sales
0     19    600
1     22    756
2     13    587
3     54    859
4     65    990


In [8]:
# Dict within a dict - Outer dict keys will be col labels, inner dict keys will be indices
d1 = {'Price': {'r1':19, 'r2':22}, 'Sales': {'r1':600, 'r2':756}}
frame1 = DataFrame(d1)
print(frame1)

    Price  Sales
r1     19    600
r2     22    756


In [10]:
# DF from dict of Series
d2 = {'Price': Series([19,22,13,54,65]), 'Sales': Series([600,756,587,859,990])}
frame2 = DataFrame(d2)
print(frame2)

   Price  Sales
0     19    600
1     22    756
2     13    587
3     54    859
4     65    990


In [11]:
# DF from a list of dicts
l1 = [{'Price':19, 'Sales':600, 'Qty':30}, {'Price':22, 'Sales':756, 'Qty':65}]
frame3 = DataFrame(l1)
print(frame3)

   Price  Sales  Qty
0     19    600   30
1     22    756   65


Accessing rows and cols of a DF

In [14]:
print(frame2['Sales'])
print(type(frame2['Sales'])) # Cols are referred to as Series

0    600
1    756
2    587
3    859
4    990
Name: Sales, dtype: int64
<class 'pandas.core.series.Series'>


In [16]:
print(frame2[['Price','Sales']])
print(type(frame2[['Price', 'Sales']])) # cols are referred to as DF

   Price  Sales
0     19    600
1     22    756
2     13    587
3     54    859
4     65    990
<class 'pandas.core.frame.DataFrame'>


In [17]:
frame

Unnamed: 0,Price,Sales
0,19,600
1,22,756
2,13,587
3,54,859
4,65,990


In [18]:
# Renaming cols - rename()
frame.rename(columns = {'Price':'Base_Price', 'Sales':'Unit Sales'}, inplace=True)
frame

Unnamed: 0,Base_Price,Unit Sales
0,19,600
1,22,756
2,13,587
3,54,859
4,65,990


In [20]:
# Accessing cols
frame.columns = ['Base_Price', 'Unit Sales']
frame

Unnamed: 0,Base_Price,Unit Sales
0,19,600
1,22,756
2,13,587
3,54,859
4,65,990


**DataFrame methods**

In [23]:
frame.columns # All cols

Index(['Base_Price', 'Unit Sales'], dtype='object')

In [24]:
frame.head() # First 5 rows

Unnamed: 0,Base_Price,Unit Sales
0,19,600
1,22,756
2,13,587
3,54,859
4,65,990


In [26]:
frame.describe() # For DS and Stati data analysis

Unnamed: 0,Base_Price,Unit Sales
count,5.0,5.0
mean,34.6,758.4
std,23.287336,171.925274
min,13.0,587.0
25%,19.0,600.0
50%,22.0,756.0
75%,54.0,859.0
max,65.0,990.0


In [27]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   Base_Price  5 non-null      int64
 1   Unit Sales  5 non-null      int64
dtypes: int64(2)
memory usage: 208.0 bytes


In [28]:
frame.dtypes

Base_Price    int64
Unit Sales    int64
dtype: object

Changing dtype of a col

In [32]:
frame['Unit Sales'] = frame['Unit Sales'].astype('float')
# This can be done by converting the col into a series as:
# frame['Unit Sales'] = Series(frame['Unit Sales'].values.astype(float))

In [33]:
frame.dtypes

Base_Price      int64
Unit Sales    float64
dtype: object