### Imports

In [1]:
import pandas as pd
import numpy as np

### Creating a Series

In [2]:
# From a list, without passing any index
s1 = pd.Series([1, 'tom', 32, 'qualified'])
print(s1)

0            1
1          tom
2           32
3    qualified
dtype: object


In [3]:
# From a list, with an index
s2 = pd.Series([1, 'tom', 32, 'qualified'], index=['number', 'name', 'age', 'status'])
print(s2)

number            1
name            tom
age              32
status    qualified
dtype: object


In [4]:
# From a list of integer values, with an index
s3 = pd.Series([1, 345, 14, 24, 12], index=['first', 'second', 'third', 'fourth', 'fifth'])
print(s3)

first       1
second    345
third      14
fourth     24
fifth      12
dtype: int64


In [5]:
# From a dict of key-value pairs
s4 = pd.Series({'number':1, 'name':'tom', 'age':32, 'status':'qualified'})
print(s4)

number            1
name            tom
age              32
status    qualified
dtype: object


### Creating a DataFrame

In [6]:
# Create a random 7 x 5 numpy ndarray
np.random.seed(42) # set a seed so that the same random numbers are generated each time
np_array = 10 * np.random.rand(7, 5)
 
# Create a list of 5 column labels
cols = ['col1', 'col2', 'col3', 'col4', 'col5']
 
# Create the DataFrame
ndf = pd.DataFrame(data=np_array, columns=cols)
 
# Display dataframe
print(ndf)

       col1      col2      col3      col4      col5
0  3.745401  9.507143  7.319939  5.986585  1.560186
1  1.559945  0.580836  8.661761  6.011150  7.080726
2  0.205845  9.699099  8.324426  2.123391  1.818250
3  1.834045  3.042422  5.247564  4.319450  2.912291
4  6.118529  1.394939  2.921446  3.663618  4.560700
5  7.851760  1.996738  5.142344  5.924146  0.464504
6  6.075449  1.705241  0.650516  9.488855  9.656320


In [7]:
# make three Series'
s1 = pd.Series([10, 20, 30, 40, 50])
s2 = pd.Series(['a', 'b', 'c', 'd', 'e'])
s3 = pd.Series(['one', 'two', 'three', 'four', 'five'])
 
# create a dict 
data_dict = {'col1': s1, 'col2': s2, 'col3': s3}

In [8]:
# create dataframe
df = pd.DataFrame(data=data_dict)
# display dataframe
print(df)

   col1 col2   col3
0    10    a    one
1    20    b    two
2    30    c  three
3    40    d   four
4    50    e   five


In [9]:
# data_dict same as defined earlier
 
# create a list of columns_labes
cols = ['col1', 'col2']
 
# create DataFrame 
df = pd.DataFrame(data=data_dict, columns=cols)
 
# display DataFrame
print(df)  

   col1 col2
0    10    a
1    20    b
2    30    c
3    40    d
4    50    e


In [10]:
# create multiple lists (one per row)
a1 = ['one', 1, 'up', 'top', 'beauty']
a2 = ['zero', 0, 'down', 'bottom', 'charm']
# combine the data into a single list
l = [a1, a2]
# create a list of column names
col = ['col1', 'col2', 'col3', 'col4', 'col5']
 
# create the DataFrame
df2 = pd.DataFrame(data=l, columns=col)
 
# display DataFrame
print(df2) 

   col1  col2  col3    col4    col5
0   one     1    up     top  beauty
1  zero     0  down  bottom   charm


### Creating or adding rows

In [11]:
print(df2)

   col1  col2  col3    col4    col5
0   one     1    up     top  beauty
1  zero     0  down  bottom   charm


In [12]:
# declare a key-value pair dict type to match the dimensions of row
row = {'col1':'two', 'col2':2, 'col3':'blue', 'col4':'green', 'col5':'red'}
 
# pass it to the append() method
new_df = df2.append(row, ignore_index=True)
 
# display the new DataFrame
print(new_df)

   col1  col2  col3    col4    col5
0   one     1    up     top  beauty
1  zero     0  down  bottom   charm
2   two     2  blue   green     red


In [13]:
# create a series with column labels of "df2" as index
row = pd.Series(['three',3,'black','white','grey'],
                index=df2.columns)
 
# pass it to the append() method
new_df = df2.append(row, ignore_index=True)
 
# display the new DataFrame
print(new_df)

    col1  col2   col3    col4    col5
0    one     1     up     top  beauty
1   zero     0   down  bottom   charm
2  three     3  black   white    grey


In [14]:
# create two series with column labels of "df2" as index
row1 = pd.Series(['four',4,'left','right','center'],
              index=df2.columns)
row2 = pd.Series(['five',5,'Winterfell','Eyrie','Sunspear'],
              index=df2.columns) 
 
# pass it to the append() method
new_df = df2.append([row1, row2], ignore_index=True)
 
# display the new DataFrame
print(new_df)

   col1  col2        col3    col4      col5
0   one     1          up     top    beauty
1  zero     0        down  bottom     charm
2  four     4        left   right    center
3  five     5  Winterfell   Eyrie  Sunspear


### Converting a DataFrame to other formats

In [15]:
# print DataFrame 
print('The DataFrame')
print(ndf)
print("")
 
# use the values attribute to return an ndarray
print('Using values attribute')
print(ndf.values)
print("")

The DataFrame
       col1      col2      col3      col4      col5
0  3.745401  9.507143  7.319939  5.986585  1.560186
1  1.559945  0.580836  8.661761  6.011150  7.080726
2  0.205845  9.699099  8.324426  2.123391  1.818250
3  1.834045  3.042422  5.247564  4.319450  2.912291
4  6.118529  1.394939  2.921446  3.663618  4.560700
5  7.851760  1.996738  5.142344  5.924146  0.464504
6  6.075449  1.705241  0.650516  9.488855  9.656320

Using values attribute
[[3.74540119 9.50714306 7.31993942 5.98658484 1.5601864 ]
 [1.5599452  0.58083612 8.66176146 6.01115012 7.08072578]
 [0.20584494 9.69909852 8.32442641 2.12339111 1.81824967]
 [1.8340451  3.04242243 5.24756432 4.31945019 2.9122914 ]
 [6.11852895 1.39493861 2.92144649 3.66361843 4.56069984]
 [7.85175961 1.99673782 5.14234438 5.92414569 0.46450413]
 [6.07544852 1.70524124 0.65051593 9.48885537 9.65632033]]



In [16]:
# use the to_numpy() method to convert to ndarray
print('Using to_numpy() method')
print(ndf.to_numpy())
print("")
 
# use to_numpy() method with explicit dtype
print('Using to_numpy() method with dtype="int"')
print(ndf.to_numpy(dtype='int'))
print("")


Using to_numpy() method
[[3.74540119 9.50714306 7.31993942 5.98658484 1.5601864 ]
 [1.5599452  0.58083612 8.66176146 6.01115012 7.08072578]
 [0.20584494 9.69909852 8.32442641 2.12339111 1.81824967]
 [1.8340451  3.04242243 5.24756432 4.31945019 2.9122914 ]
 [6.11852895 1.39493861 2.92144649 3.66361843 4.56069984]
 [7.85175961 1.99673782 5.14234438 5.92414569 0.46450413]
 [6.07544852 1.70524124 0.65051593 9.48885537 9.65632033]]

Using to_numpy() method with dtype="int"
[[3 9 7 5 1]
 [1 0 8 6 7]
 [0 9 8 2 1]
 [1 3 5 4 2]
 [6 1 2 3 4]
 [7 1 5 5 0]
 [6 1 0 9 9]]



### Converting to a dict or a list

In [17]:
import pprint
# print the actual dataframe
print('The dataframe')
print(new_df)
print('')
 
print('to_dict() with orient="dict"')
pprint.pprint(new_df.to_dict(orient='dict'))
print('')
 
print('to_dict() with orient="list"')
pprint.pprint(new_df.to_dict(orient='list'))
print('')
 
print('to_dict() with orient="series"')
pprint.pprint(new_df.to_dict(orient='series'))
print('')
print('to_dict() with orient="records"')
pprint.pprint(new_df.to_dict(orient='records'))
print('') 

The dataframe
   col1  col2        col3    col4      col5
0   one     1          up     top    beauty
1  zero     0        down  bottom     charm
2  four     4        left   right    center
3  five     5  Winterfell   Eyrie  Sunspear

to_dict() with orient="dict"
{'col1': {0: 'one', 1: 'zero', 2: 'four', 3: 'five'},
 'col2': {0: 1, 1: 0, 2: 4, 3: 5},
 'col3': {0: 'up', 1: 'down', 2: 'left', 3: 'Winterfell'},
 'col4': {0: 'top', 1: 'bottom', 2: 'right', 3: 'Eyrie'},
 'col5': {0: 'beauty', 1: 'charm', 2: 'center', 3: 'Sunspear'}}

to_dict() with orient="list"
{'col1': ['one', 'zero', 'four', 'five'],
 'col2': [1, 0, 4, 5],
 'col3': ['up', 'down', 'left', 'Winterfell'],
 'col4': ['top', 'bottom', 'right', 'Eyrie'],
 'col5': ['beauty', 'charm', 'center', 'Sunspear']}

to_dict() with orient="series"
{'col1': 0     one
1    zero
2    four
3    five
Name: col1, dtype: object,
 'col2': 0    1
1    0
2    4
3    5
Name: col2, dtype: int64,
 'col3': 0            up
1          down
2          lef