In [1]:
import pandas as pd

In [5]:
# reading tab separated file
orders = pd.read_table("http://bit.ly/chiporders")

In [6]:
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [7]:
# reading pipe separated file
users = pd.read_table("http://bit.ly/movieusers")

In [8]:
users.head()

Unnamed: 0,1|24|M|technician|85711
0,2|53|F|other|94043
1,3|23|M|writer|32067
2,4|24|M|technician|43537
3,5|33|F|other|15213
4,6|42|M|executive|98101


In [9]:
users = pd.read_table("http://bit.ly/movieusers", sep="|")

In [10]:
users.head()

Unnamed: 0,1,24,M,technician,85711
0,2,53,F,other,94043
1,3,23,M,writer,32067
2,4,24,M,technician,43537
3,5,33,F,other,15213
4,6,42,M,executive,98101


In [11]:
# there is no header, 1st record behave as header
users = pd.read_table("http://bit.ly/movieusers", sep="|", header=None)

In [12]:
users.head()

Unnamed: 0,0,1,2,3,4
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [13]:
# assign column name to users dataframe
names = ["user_id", "age", "gender", "occupation", "zip_code"]
users = pd.read_table("http://bit.ly/movieusers", sep="|", names=names)

In [14]:
users.head()

Unnamed: 0,user_id,age,gender,occupation,zip_code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


<h5>Objects of Pandas</h5>
<p>There are two main types of objects used in pandas.</p>
<ul>
<li>Pandas Dataframe - consists of rows and columns just like a table or in a tabular form</li>
<li>Pandas Series - each column of dataframe is called series</li>
</ul>

In [20]:
# there are two ways to read csv (comma separated file) either using read_table or read_csv
# 1st approach (by default separator is tab)
ufo = pd.read_table("http://bit.ly/uforeports", sep=",")
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [21]:
# 2nd approach (by default separator is comma(,))
ufo = pd.read_csv("http://bit.ly/uforeports")
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [22]:
# lets check type of ufo
type(ufo)

pandas.core.frame.DataFrame

In [24]:
type(ufo["City"])

pandas.core.series.Series

<h5>There are two ways to read specific columns</h5>
<ul>
<li>using dot notation</li>
<li>using bracket notation</li>
</ul>

In [25]:
ufo.City

0                      Ithaca
1                 Willingboro
2                     Holyoke
3                     Abilene
4        New York Worlds Fair
5                 Valley City
6                 Crater Lake
7                        Alma
8                     Eklutna
9                     Hubbard
10                    Fontana
11                   Waterloo
12                     Belton
13                     Keokuk
14                  Ludington
15                Forest Home
16                Los Angeles
17                  Hapeville
18                     Oneida
19                 Bering Sea
20                   Nebraska
21                        NaN
22                        NaN
23                  Owensboro
24                 Wilderness
25                  San Diego
26                 Wilderness
27                     Clovis
28                 Los Alamos
29               Ft. Duschene
                 ...         
18211                 Holyoke
18212                  Carson
18213     

In [26]:

ufo["City"]

0                      Ithaca
1                 Willingboro
2                     Holyoke
3                     Abilene
4        New York Worlds Fair
5                 Valley City
6                 Crater Lake
7                        Alma
8                     Eklutna
9                     Hubbard
10                    Fontana
11                   Waterloo
12                     Belton
13                     Keokuk
14                  Ludington
15                Forest Home
16                Los Angeles
17                  Hapeville
18                     Oneida
19                 Bering Sea
20                   Nebraska
21                        NaN
22                        NaN
23                  Owensboro
24                 Wilderness
25                  San Diego
26                 Wilderness
27                     Clovis
28                 Los Alamos
29               Ft. Duschene
                 ...         
18211                 Holyoke
18212                  Carson
18213     

<b><p>Dot notation does not work in some cases which are given below:</p></b>
<ul>
<li>if there is space within column names</li>
<li>if column name is same as built in attribute</li>
</ul>

In [29]:
# 1st condition where dot notation gets fail
ufo.Colors Reported

SyntaxError: invalid syntax (<ipython-input-29-e1913cf8c17d>, line 2)

In [30]:
# 2nd condition where dot notation gets fail
ufo.shape

(18241, 5)

In [31]:
# tackle dot notation problems using brackets
ufo["Colors Reported"]

0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
5           NaN
6           NaN
7           NaN
8           NaN
9           NaN
10          NaN
11          NaN
12          RED
13          NaN
14          NaN
15          NaN
16          NaN
17          NaN
18          NaN
19          RED
20          NaN
21          NaN
22          NaN
23          NaN
24          NaN
25          NaN
26          NaN
27          NaN
28          NaN
29          NaN
          ...  
18211       NaN
18212       NaN
18213     GREEN
18214       NaN
18215       NaN
18216    ORANGE
18217       NaN
18218       NaN
18219       NaN
18220      BLUE
18221       NaN
18222       NaN
18223       NaN
18224       NaN
18225       NaN
18226       NaN
18227       NaN
18228       NaN
18229       NaN
18230       NaN
18231       NaN
18232       NaN
18233       RED
18234       NaN
18235       NaN
18236       NaN
18237       NaN
18238       NaN
18239       RED
18240       NaN
Name: Colors Reported, d

<h6>To create new column in pandas dataframe, use column name with bracket notation and assign values</h6>
<font color="red">Don't use dot notation while creating new column</font>

In [32]:
ufo["location"] = ufo["City"] + ", " + ufo["State"]