### Reading data and initial exploration

In [1]:
import pandas as pd

In [2]:
url = 'http://bit.ly/movieusers'
ufo = pd.read_table(url, sep=",")

In [16]:
ufo.shape

In [None]:
ufo.head()

In [None]:
ufo.dtypes

In [21]:
ufo.columns

Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')

#### Changing the initial structure 

In [33]:
users = pd.read_table(url2, sep="|", header=None, names=["age", "gender", "occupation", "zip"])

In [39]:
ufo.rename(columns = {'Colors Reported': 'colors_reported', 'Shape Reported': 'shape_reported'}, inplace=True)

In [40]:
ufo.columns

Index(['City', 'colors_reported', 'shape_reported', 'State', 'Time',
       'Location'],
      dtype='object')

In [42]:
ufo.columns = ['City', 'colors_reported', 'shape_reported', 'State', 'Time',
       'Location']

In [None]:
ufo.drop('colors_reported', axis=1)

### Working with series

#### Reading a series

In [None]:
ufo.City

In [None]:
ufo['City']

#### Creating a new series

In [26]:
ufo['Location'] = ufo.City + ', ' + ufo.State

In [None]:
ufo.loc[:5]

### String methods and changing types

In [81]:
url = 'http://bit.ly/chiporders'
orders = pd.read_table(url)

In [82]:
orders.item_price = orders.item_price.str.replace('$', '')
orders.item_price = orders.item_price.astype('float')

## SQL Methods in Pandas

### Selecting Columns

In [93]:
orders.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [None]:
orders[['item_price', 'quantity']]

In [104]:
orders.loc[0:10]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
6,3,1,Side of Chips,,1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",9.25


In [164]:
orders.loc[7]

order_id                                                              4
quantity                                                              1
item_name                                                 Steak Burrito
choice_description    [Tomatillo Red Chili Salsa, [Fajita Vegetables...
item_price                                                        11.75
Name: 7, dtype: object

In [139]:
second_orders = orders.loc[orders.quantity == 2]

In [177]:
orders.loc[orders.quantity == 3].loc[3480].choice_description

'[Roasted Chili Corn Salsa, [Fajita Vegetables, Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]]'

In [146]:
second_orders.choice_description = '[' + second_orders.choice_description 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [181]:
second_orders.choice_description.tolist()

['[[Tomatillo-Red Chili Salsa (Hot), [Black Beans, Rice, Cheese, Sour Cream]]',
 '[[Sprite]',
 '[[Mountain Dew]',
 '[[Tomatillo Green Chili Salsa, [Sour Cream, Cheese, Guacamole]]',
 '[[Tomatillo-Red Chili Salsa (Hot), [Rice, Cheese, Sour Cream, Lettuce]]',
 '[[Tomatillo Red Chili Salsa, [Rice, Black Beans, Sour Cream, Cheese, Lettuce]]',
 '[[Fresh Tomato Salsa, [Fajita Vegetables, Rice, Lettuce]]',
 '[[Diet Coke]',
 '[[Diet Dr. Pepper]',
 '[[Fresh Tomato Salsa, [Fajita Vegetables, Pinto Beans, Guacamole]]',
 '[[Tomatillo Red Chili Salsa, [Cheese, Sour Cream, Lettuce]]',
 nan,
 nan,
 '[[Tomatillo Red Chili Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]]',
 '[[Tomatillo Red Chili Salsa, [Black Beans, Cheese, Guacamole]]',
 '[[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream]]',
 '[[Fresh Tomato Salsa, Fajita Vegetables]',
 '[[Diet Coke]',
 '[[Coca Cola]',
 '[[Fresh Tomato (Mild), [Lettuce, Guacamole, Rice, Cheese]]',
 '[[Fresh Tomato Salsa, Sour Cream]',
 '[[Tomatillo

### Sorting a series

In [None]:
orders.item_price.sort_values()

In [87]:
orders.item_price.sort_values(ascending=False)[0:10]

3598    44.25
3480    35.25
1254    35.00
3602    35.00
3601    33.75
409     32.94
1255    27.75
3603    27.75
3636    26.25
3634    26.25
Name: item_price, dtype: float64

### Sorting a dataframe

In [None]:
orders.sort_values('item_price')

### String methods in pandas

In [187]:
orders.values

array([[1, 1, 'Chips and Fresh Tomato Salsa', nan, 2.39],
       [1, 1, 'Izze', '[Clementine]', 3.39],
       [1, 1, 'Nantucket Nectar', '[Apple]', 3.39],
       ...,
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Pinto Beans, Guacamole, Lettuce]]',
        11.25],
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Lettuce]]', 8.75],
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Pinto Beans, Lettuce]]',
        8.75]], dtype=object)