# What is Pandas
python library for data manipulation and analysis

In [31]:
import pandas as pd

In [32]:
data_frame = pd.read_csv('data/friend_list.csv')

In [33]:
data_frame

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


# What is DataFrame?
dataframe is a 2-dimensional labeled data structure with columns

In [34]:
data_frame.head(2)

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer


In [35]:
data_frame.tail(2)

Unnamed: 0,name,age,job
4,Brian,45,manager
5,Chris,25,intern


# What is Series?
Every Single column in dataframe is series

In [36]:
type(data_frame.name)

pandas.core.series.Series

In [37]:
type(data_frame.age)

pandas.core.series.Series

In [38]:
type(data_frame.job)

pandas.core.series.Series

In [39]:
list_tmp = [1, 2, 3]

In [40]:
list_tmp

[1, 2, 3]

In [41]:
s1 = pd.core.series.Series( [1, 2, 3] )

In [42]:
s2 = pd.core.series.Series( ['one', 'two', 'three' ] )

In [45]:
pd.DataFrame(data=dict(num=s1, word=s2))

Unnamed: 0,num,word
0,1,one
1,2,two
2,3,three


In [46]:
df = pd.read_csv('data/friend_list.csv')

In [47]:
df

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


In [48]:
df.head(2)

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer


In [49]:
df = pd.read_csv('data/friend_list.txt')

In [50]:
df

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


In [51]:
df = pd.read_csv('data/friend_list_tab.txt')

In [52]:
df

Unnamed: 0,name	age	job
0,John\t20\tstudent
1,Jenny\t30\tdeveloper
2,Nate\t30\tteacher
3,Julia\t40\tdentist
4,Brian\t45\tmanager
5,Chris\t25\tintern


In [53]:
df = pd.read_csv('data/friend_list_tab.txt', delimiter='\t')

In [54]:
df

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


In [55]:
df = pd.read_csv('data/friend_list_no_head.csv')

In [56]:
df

Unnamed: 0,John,20,student
0,Jenny,30,developer
1,Nate,30,teacher
2,Julia,40,dentist
3,Brian,45,manager
4,Chris,25,intern


In [57]:
df = pd.read_csv('data/friend_list_no_head.csv', header=None)

In [58]:
df

Unnamed: 0,0,1,2
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


In [59]:
df.columns = ['name', 'age', 'job']

In [60]:
df

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


In [61]:
df = pd.read_csv('data/friend_list_no_head.csv', header=None, names=['name', 'age', 'job'])

In [62]:
df

Unnamed: 0,name,age,job
0,John,20,student
1,Jenny,30,developer
2,Nate,30,teacher
3,Julia,40,dentist
4,Brian,45,manager
5,Chris,25,intern


# Create DataFrame
when you want to create dataframe from your python code

## from dictionary

In [63]:
friend_dict_list = [
    {'name':'Jone', 'age':25, 'job':'student'},
    {'name':'Nate', 'age':30, 'job':'teacher'}
]

In [64]:
df = pd.DataFrame(friend_dict_list)

In [65]:
df

Unnamed: 0,age,job,name
0,25,student,Jone
1,30,teacher,Nate


In [66]:
df = df[['name', 'age', 'job']]

In [67]:
df

Unnamed: 0,name,age,job
0,Jone,25,student
1,Nate,30,teacher


## from OrderedDict

In [68]:
from collections import OrderedDict

In [70]:
friend_ordered_dict = OrderedDict(
    [
        ('name', ['Jone', 'Nate']),
        ('age', [25, 30]),
        ('job', ['student', 'teacher'])
    ]
)

In [71]:
df = pd.DataFrame.from_dict(friend_ordered_dict)

In [72]:
df

Unnamed: 0,name,age,job
0,Jone,25,student
1,Nate,30,teacher


## from list

In [73]:
friend_list = [
    ['Jone', 25, 'studnet'],
    ['Nate', 30, 'teacher']
]

In [74]:
column_name = ['name', 'age', 'job']

In [75]:
df = pd.DataFrame.from_records(friend_list, columns=column_name)

In [76]:
df

Unnamed: 0,name,age,job
0,Jone,25,studnet
1,Nate,30,teacher


In [77]:
friend_list = [
        ['name', ['Jone', 'Nate']],
        ['age', [25, 30]],
        ['job', ['student', 'teacher']]
]

In [78]:
df = pd.DataFrame.from_items(friend_list)

In [79]:
df

Unnamed: 0,name,age,job
0,Jone,25,student
1,Nate,30,teacher


# Write DataFrame to File
here is one dataframe example with header

In [84]:
friends = [
    {'name': 'Jone', 'age': 20, 'job': 'studenet'},
    {'name': 'Jenny', 'age': 30, 'job': None},
    {'name': 'Nate', 'age': 30, 'job': 'teacher'}
]
df = pd.DataFrame.from_dict(friends)
df = df[['name', 'age', 'job']]

In [85]:
df

Unnamed: 0,name,age,job
0,Jone,20,studenet
1,Jenny,30,
2,Nate,30,teacher


In [87]:
df.to_csv('friends.csv', index=False, header=False, na_rep='-')

In [88]:
df

Unnamed: 0,name,age,job
0,Jone,20,studenet
1,Jenny,30,
2,Nate,30,teacher
