# Introducing the Pandas DataFrame

In [1]:
import pandas as pd

In [2]:
data = {
    'name': ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'],
    'city': ['Mexico City', 'Toronto', 'Prague', 'Shanghai', 'Manchester', 'Cairo', 'Osaka'],
    'age': [41, 28, 33, 34, 38, 31, 37],
    'py-score': [88.0, 79.0, 81.0, 80.0, 68.0, 61.0, 84.0]
 }

index = range(101, 108)

In [4]:
df = pd.DataFrame(data, index=index)

In [5]:
type(df)

pandas.core.frame.DataFrame

In [6]:
df.index

RangeIndex(start=101, stop=108, step=1)

In [7]:
df.columns

Index(['name', 'city', 'age', 'py-score'], dtype='object')

In [8]:
type(df.columns)

pandas.core.indexes.base.Index

In [9]:
df.head()
df.tail()

Unnamed: 0,name,city,age,py-score
103,Jana,Prague,33,81.0
104,Yi,Shanghai,34,80.0
105,Robin,Manchester,38,68.0
106,Amal,Cairo,31,61.0
107,Nori,Osaka,37,84.0


In [10]:
df.head()

Unnamed: 0,name,city,age,py-score
101,Xavier,Mexico City,41,88.0
102,Ann,Toronto,28,79.0
103,Jana,Prague,33,81.0
104,Yi,Shanghai,34,80.0
105,Robin,Manchester,38,68.0


In [11]:
df

Unnamed: 0,name,city,age,py-score
101,Xavier,Mexico City,41,88.0
102,Ann,Toronto,28,79.0
103,Jana,Prague,33,81.0
104,Yi,Shanghai,34,80.0
105,Robin,Manchester,38,68.0
106,Amal,Cairo,31,61.0
107,Nori,Osaka,37,84.0


In [12]:
df[city]

NameError: name 'city' is not defined

In [13]:
df['city']

101    Mexico City
102        Toronto
103         Prague
104       Shanghai
105     Manchester
106          Cairo
107          Osaka
Name: city, dtype: object

In [14]:
type(df['city'])

pandas.core.series.Series

In [15]:
cities = df['city']

In [16]:
cities

101    Mexico City
102        Toronto
103         Prague
104       Shanghai
105     Manchester
106          Cairo
107          Osaka
Name: city, dtype: object

In [17]:
cities.index

RangeIndex(start=101, stop=108, step=1)

In [18]:
cities[2]

KeyError: 2

In [19]:
cities[102]

'Toronto'

In [20]:
df.age

101    41
102    28
103    33
104    34
105    38
106    31
107    37
Name: age, dtype: int64

In [21]:
df.age[105]

38

In [22]:
df.loc[105]

name             Robin
city        Manchester
age                 38
py-score          68.0
Name: 105, dtype: object

In [23]:
df.loc[105]['city']

'Manchester'

## Creating a pandas df

In [24]:
pd.DataFrame()

In [25]:
pd.DataFrame

pandas.core.frame.DataFrame

In [26]:
pd.DataFrame()

In [27]:
import numpy as np

In [28]:
d = {'x': [1, 2, 3], 'y': np.array([2, 4, 8]), 'z': 100}

In [29]:
d

{'x': [1, 2, 3], 'y': array([2, 4, 8]), 'z': 100}

In [32]:
pd.DataFrame(d, index = [100, 200, 300], columns = ['z', 'y', 'x'])

Unnamed: 0,z,y,x
100,100,2,1
200,100,4,2
300,100,8,3


In [35]:
lista = [{'x': 1, 'y': 2, 'z': 100}, {'x': 2, 'y': 4, 'z': 100}, {'x': 3, 'y': 8, 'z': 100}]

In [36]:
lista

[{'x': 1, 'y': 2, 'z': 100},
 {'x': 2, 'y': 4, 'z': 100},
 {'x': 3, 'y': 8, 'z': 100}]

In [40]:
second_pd = pd.DataFrame(lista, index = ['r1', 'r2', 'r3'])

In [41]:
second_pd

Unnamed: 0,x,y,z
r1,1,2,100
r2,2,4,100
r3,3,8,100


In [42]:
list2 = l = [[1, 2, 100], [2, 4, 100], [3, 8, 100]]

In [43]:
list2

[[1, 2, 100], [2, 4, 100], [3, 8, 100]]

In [44]:
  third_pd = pd.DataFrame(list2, columns = ['XX', 'YY', 'ZZ'])

In [45]:
third_pd

Unnamed: 0,XX,YY,ZZ
0,1,2,100
1,2,4,100
2,3,8,100


In [46]:
arr = np.array([[1, 2, 100], [2, 4, 100], [3, 8, 100]])

In [47]:
arr

array([[  1,   2, 100],
       [  2,   4, 100],
       [  3,   8, 100]])

In [48]:
df_4 = pd.DataFrame(arr, columns = ['a', 'b', 'c'])

In [49]:
df_4

Unnamed: 0,a,b,c
0,1,2,100
1,2,4,100
2,3,8,100
