# Series

In [1]:
import pandas as pd

In [2]:
%%bash
ls

Introduction to Data Science in Python
NumPy Fundamentals.ipynb
Pandas Data Structures.ipynb
Pandas Fundamentals.ipynb
README.md
Working with Missing Data.ipynb
fandango_score_comparison.csv
food_info.csv
titanic_survival.csv
world_alcohol.csv


In [3]:
fandango_df = pd.read_csv("fandango_score_comparison.csv")
fandango_df.head()

Unnamed: 0,FILM,RottenTomatoes,RottenTomatoes_User,Metacritic,Metacritic_User,IMDB,Fandango_Stars,Fandango_Ratingvalue,RT_norm,RT_user_norm,...,IMDB_norm,RT_norm_round,RT_user_norm_round,Metacritic_norm_round,Metacritic_user_norm_round,IMDB_norm_round,Metacritic_user_vote_count,IMDB_user_vote_count,Fandango_votes,Fandango_Difference
0,Avengers: Age of Ultron (2015),74,86,66,7.1,7.8,5.0,4.5,3.7,4.3,...,3.9,3.5,4.5,3.5,3.5,4.0,1330,271107,14846,0.5
1,Cinderella (2015),85,80,67,7.5,7.1,5.0,4.5,4.25,4.0,...,3.55,4.5,4.0,3.5,4.0,3.5,249,65709,12640,0.5
2,Ant-Man (2015),80,90,64,8.1,7.8,5.0,4.5,4.0,4.5,...,3.9,4.0,4.5,3.0,4.0,4.0,627,103660,12055,0.5
3,Do You Believe? (2015),18,84,22,4.7,5.4,5.0,4.5,0.9,4.2,...,2.7,1.0,4.0,1.0,2.5,2.5,31,3136,1793,0.5
4,Hot Tub Time Machine 2 (2015),14,28,29,3.4,5.1,3.5,3.0,0.7,1.4,...,2.55,0.5,1.5,1.5,1.5,2.5,88,19560,1021,0.5


DataFrames use Series objects to represent columns. When we select a single column from a DataFrame, pandas will return the Series object representing that column. By default, pandas indexes each individual Series object in a DataFrame with the integer data type. Each value in the Series has a unique integer index, or position. Like most Python data structures, the Series object uses 0-indexing. The indexing ranges from `0` to `n-1`, where n is the number of rows. We can use an integer index to select an individual value in a Series if we know its position. 

In [4]:
series_film = fandango_df["FILM"]
print(type(series_film),"\n", series_film.head())

series_rt = fandango_df["RottenTomatoes"]
print(type(series_rt),"\n", series_rt.head())

<class 'pandas.core.series.Series'> 
 0    Avengers: Age of Ultron (2015)
1                 Cinderella (2015)
2                    Ant-Man (2015)
3            Do You Believe? (2015)
4     Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object
<class 'pandas.core.series.Series'> 
 0    74
1    85
2    80
3    18
4    14
Name: RottenTomatoes, dtype: int64


It is possible to make `Series` objects with customized indexes

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html#pandas.Series

In [5]:
film_names = series_film.values
rt_scores = series_rt.values

series_custom = pd.Series(rt_scores, index=film_names)
series_custom.head()

Avengers: Age of Ultron (2015)    74
Cinderella (2015)                 85
Ant-Man (2015)                    80
Do You Believe? (2015)            18
Hot Tub Time Machine 2 (2015)     14
dtype: int64

It is possible to access values by labels and by indexes.

In [6]:
print(series_custom[['Minions (2015)', 'Leviathan (2014)']])
print(series_custom[5:11])

Minions (2015)      54
Leviathan (2014)    99
dtype: int64
The Water Diviner (2015)             63
Irrational Man (2015)                42
Top Five (2014)                      86
Shaun the Sheep Movie (2015)         99
Love & Mercy (2015)                  89
Far From The Madding Crowd (2015)    84
dtype: int64


### Reindexing

http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.Series.reindex.html

It is possible to use `reindex()` for sorting values:

In [7]:
original_index = series_custom.index

original_index = sorted(original_index)
sorted_by_index = series_custom.reindex(original_index)

sorted_by_index.head()

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
dtype: int64

But there are more convinient ways for sorting data in `Series` object

In [8]:
sc2 = series_custom.sort_index()
sc3 = series_custom.sort_values()

print(sc2.head(10),"\n")
print(sc3.head(10))

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
Aloha (2015)                  19
American Sniper (2015)        72
American Ultra (2015)         46
Amy (2015)                    97
Annie (2014)                  27
dtype: int64 

Paul Blart: Mall Cop 2 (2015)     5
Hitman: Agent 47 (2015)           7
Hot Pursuit (2015)                8
Fantastic Four (2015)             9
Taken 3 (2015)                    9
The Boy Next Door (2015)         10
The Loft (2015)                  11
Unfinished Business (2015)       11
Mortdecai (2015)                 12
Seventh Son (2015)               12
dtype: int64


### Filtering

In [9]:
series_custom > 90

Avengers: Age of Ultron (2015)                    False
Cinderella (2015)                                 False
Ant-Man (2015)                                    False
Do You Believe? (2015)                            False
Hot Tub Time Machine 2 (2015)                     False
The Water Diviner (2015)                          False
Irrational Man (2015)                             False
Top Five (2014)                                   False
Shaun the Sheep Movie (2015)                       True
Love & Mercy (2015)                               False
Far From The Madding Crowd (2015)                 False
Black Sea (2015)                                  False
Leviathan (2014)                                   True
Unbroken (2014)                                   False
The Imitation Game (2014)                         False
Taken 3 (2015)                                    False
Ted 2 (2015)                                      False
Southpaw (2015)                                 

In [10]:
series_greater_than_90 = series_custom[series_custom > 90]
series_greater_than_90

Shaun the Sheep Movie (2015)                    99
Leviathan (2014)                                99
Selma (2014)                                    99
Ex Machina (2015)                               92
Wild Tales (2014)                               96
The End of the Tour (2015)                      92
Red Army (2015)                                 96
The Hunting Ground (2015)                       92
I'll See You In My Dreams (2015)                94
Timbuktu (2015)                                 99
About Elly (2015)                               97
The Diary of a Teenage Girl (2015)              95
Birdman (2014)                                  92
The Gift (2015)                                 93
Monkey Kingdom (2015)                           94
Mr. Turner (2014)                               98
Seymour: An Introduction (2015)                100
The Wrecking Crew (2015)                        93
Mad Max: Fury Road (2015)                       97
Spy (2015)                     

In [11]:
criteria_one = series_custom > 50
criteria_two = series_custom < 60

both_criteria = series_custom[criteria_one & criteria_two]
both_criteria

Unbroken (2014)                  51
Southpaw (2015)                  59
Insidious: Chapter 3 (2015)      59
5 Flights Up (2015)              52
Saint Laurent (2015)             51
The Age of Adaline (2015)        54
Paper Towns (2015)               55
Maggie (2015)                    54
Focus (2015)                     57
Escobar: Paradise Lost (2015)    52
Woman in Gold (2015)             52
Minions (2015)                   54
Spare Parts (2015)               52
dtype: int64

In [12]:
criteria_one = series_custom < 10
criteria_two = series_custom > 90

both_criteria = series_custom[criteria_one | criteria_two]
both_criteria

Shaun the Sheep Movie (2015)                    99
Leviathan (2014)                                99
Taken 3 (2015)                                   9
Selma (2014)                                    99
Ex Machina (2015)                               92
Wild Tales (2014)                               96
The End of the Tour (2015)                      92
Red Army (2015)                                 96
The Hunting Ground (2015)                       92
I'll See You In My Dreams (2015)                94
Timbuktu (2015)                                 99
About Elly (2015)                               97
The Diary of a Teenage Girl (2015)              95
Fantastic Four (2015)                            9
Hot Pursuit (2015)                               8
Birdman (2014)                                  92
The Gift (2015)                                 93
Monkey Kingdom (2015)                           94
Mr. Turner (2014)                               98
Seymour: An Introduction (2015)

### All together

In [13]:
# Creating Series objects containing the average ratings from critics and users for each film. 
rt_critics = pd.Series(fandango_df['RottenTomatoes'].values, index=fandango_df['FILM']) 
rt_users = pd.Series(fandango_df['RottenTomatoes_User'].values, index=fandango_df['FILM'])


# Calculating mean of both ratings using vectorized operations and Series object
rt_mean = pd.Series((rt_critics+rt_users)/2, index=fandango_df['FILM'])
rt_mean

FILM
Avengers: Age of Ultron (2015)                    80.0
Cinderella (2015)                                 82.5
Ant-Man (2015)                                    85.0
Do You Believe? (2015)                            51.0
Hot Tub Time Machine 2 (2015)                     21.0
The Water Diviner (2015)                          62.5
Irrational Man (2015)                             47.5
Top Five (2014)                                   75.0
Shaun the Sheep Movie (2015)                      90.5
Love & Mercy (2015)                               88.0
Far From The Madding Crowd (2015)                 80.5
Black Sea (2015)                                  71.0
Leviathan (2014)                                  89.0
Unbroken (2014)                                   60.5
The Imitation Game (2014)                         91.0
Taken 3 (2015)                                    27.5
Ted 2 (2015)                                      52.0
Southpaw (2015)                                   69.5
Night

# DataFrames

In [16]:
fandango_films = fandango_df.set_index(fandango_df["FILM"],drop=False)

In [18]:
import numpy as np

# returns the data types as a Series
types = fandango_films.dtypes
# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values == 'float64'].index
# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]

# `x` is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))

print(deviations)

Metacritic_User               1.505529
IMDB                          0.955447
Fandango_Stars                0.538532
Fandango_Ratingvalue          0.501106
RT_norm                       1.503265
RT_user_norm                  0.997787
Metacritic_norm               0.972522
Metacritic_user_nom           0.752765
IMDB_norm                     0.477723
RT_norm_round                 1.509404
RT_user_norm_round            1.003559
Metacritic_norm_round         0.987561
Metacritic_user_norm_round    0.785412
IMDB_norm_round               0.501043
Fandango_Difference           0.152141
dtype: float64


`apply()` method for rows

In [19]:
rt_mt_user = float_df[['RT_user_norm', 'Metacritic_user_nom']]
rt_mt_deviations = rt_mt_user.apply(lambda x: np.std(x), axis=1)
print(rt_mt_deviations[0:5])

rt_mt_means = rt_mt_user.apply(lambda x: np.mean(x),axis=1)
print(rt_mt_means.head(5))

FILM
Avengers: Age of Ultron (2015)    0.375
Cinderella (2015)                 0.125
Ant-Man (2015)                    0.225
Do You Believe? (2015)            0.925
Hot Tub Time Machine 2 (2015)     0.150
dtype: float64
FILM
Avengers: Age of Ultron (2015)    3.925
Cinderella (2015)                 3.875
Ant-Man (2015)                    4.275
Do You Believe? (2015)            3.275
Hot Tub Time Machine 2 (2015)     1.550
dtype: float64
