# Welcome to Jupyter!

# Importing the pandas library

In [1]:
import pandas as pd
# pd.Series?

### Creating a series

In [2]:
animals = ['Tiger', ' Bear', 'Cow']
pd.Series(animals)

0    Tiger
1     Bear
2      Cow
dtype: object

In [3]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

# Handling Missing data in Pandas

In [4]:
# Notice that pandas automatically identifies it as an object

animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [5]:
# Let's try it with numbers

numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

### Pandas replaces the null  values TO NaN, abnd automatically sets the type of container to float

#### Nan is not none. It also cannot be compared to NaN

In [6]:
import numpy as np
np.nan == None

False

In [7]:
np.nan == np.nan

False

# Testing the presence of nan

In [8]:
np.isnan(np.nan)

True

## Playing around with series

In [9]:
sports = {'Archery':'Bhutan',
         'Hockey':'India',
         'Sumo':'Japan'}
s = pd.Series(sports)
s

Archery    Bhutan
Hockey      India
Sumo        Japan
dtype: object

## Acessing all the index objects

In [10]:
s.index

Index(['Archery', 'Hockey', 'Sumo'], dtype='object')

## Describing index explicitly via lists (overloading)

In [11]:
s = pd.Series(['Bhutan', 'India', 'Japan'], index = ['Archery','Hockey','Sumo'])
s

Archery    Bhutan
Hockey      India
Sumo        Japan
dtype: object

## Index Positon and Index Label

In [12]:
sports = {'Archery':'Bhutan',
         'Hockey':'India',
         'Sumo':'Japan',
         'Taekwondo' : 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Hockey             India
Sumo               Japan
Taekwondo    South Korea
dtype: object

# ILOC (Index position)

In [13]:
# Let us access the 4th country 

s.iloc[3]

'South Korea'

# Loc (Index Label)

In [14]:
s.loc['Hockey']

'India'

## Note, If you don't provide index, then index position and index label are the same

## Note, ILOC and LOC are attributes not methods, so don't use brackets to query them

## Pandas is smart. If you pass a number, it will interpret it as iloc, if you pass an object, it will interpret it as iloc

In [15]:
print (s[3])
print (s['Hockey'])

South Korea
India


#### Don't use this option when index are also integeres. For safety use iloc and loc

# Demonstration of parallell computing in pandas

In [16]:
s = pd.Series([100.00,200.00,30.00,56.00])
s

0    100.0
1    200.0
2     30.0
3     56.0
dtype: float64

In [17]:
# Let's get the sum of all items
total = 0
for item in s:
    total +=item

total    

386.0

# Vectorization in Pandas

In [18]:
# Using the np.sum function. Parameter is an iterable.

In [19]:
import numpy as np
total = np.sum(s)
total

386.0

## Creating a random Series

In [20]:
a = pd.Series(np.random.randint(0,1000,10000))

## Printing a segment of the big data stored in series

In [21]:
a.head()

0    581
1     65
2    713
3    403
4    170
dtype: int64

In [22]:
len(a)

10000

## Cellular magic functions in Jupyter Notebook 

In [23]:
%%timeit -n 100
summary = 0
for item in a:
    summary+= item

839 µs ± 161 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Let's see the time improvement with vectorization

In [24]:
%%timeit -n 100
summary = np.sum(a)

151 µs ± 42.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Broadcasting in Series

In [25]:
s = pd.Series(np.random.randint(0,1000,10000))
s +=2
s.head()

0    219
1    809
2    747
3    816
4    697
dtype: int64

## Iterating over a series container and modifying the contents

In [26]:
for label, value in s.iteritems():
    # Noet that set_value has been removed
    # s.set_value(label,value+2)
    s.loc[label] = value+2
s.head()    

0    221
1    811
2    749
3    818
4    699
dtype: int64

# BroadCasting is way more faster

## Indexing types can automatically be upgraded

In [27]:
s = pd.Series([1,2,3])
s.loc['dog']='bark'
s

0         1
1         2
2         3
dog    bark
dtype: object

# Duplication of index values in Series

In [28]:
cricket_loving_countries = pd.Series(['a','b','c','d'], index = ['cricket','cricket','cricket','cricket'])
cricket_loving_countries

cricket    a
cricket    b
cricket    c
cricket    d
dtype: object

In [29]:
original_sports = pd.Series({'hockey':'India',
                             'football':'america'})
original_sports

hockey        India
football    america
dtype: object

# Appending 2 series doesn't modify the original series

In [30]:
new_series = original_sports.append(cricket_loving_countries)
new_series

hockey        India
football    america
cricket           a
cricket           b
cricket           c
cricket           d
dtype: object

In [31]:
original_sports

hockey        India
football    america
dtype: object

In [32]:
cricket_loving_countries

cricket    a
cricket    b
cricket    c
cricket    d
dtype: object

# The original series is unchanged

# Notice the brackets in loc

In [33]:
new_series.loc['cricket']

cricket    a
cricket    b
cricket    c
cricket    d
dtype: object

This repo contains an introduction to [Jupyter](https://jupyter.org) and [IPython](https://ipython.org).

Outline of some basics:

* [Notebook Basics](../examples/Notebook/Notebook%20Basics.ipynb)
* [IPython - beyond plain python](../examples/IPython%20Kernel/Beyond%20Plain%20Python.ipynb)
* [Markdown Cells](../examples/Notebook/Working%20With%20Markdown%20Cells.ipynb)
* [Rich Display System](../examples/IPython%20Kernel/Rich%20Output.ipynb)
* [Custom Display logic](../examples/IPython%20Kernel/Custom%20Display%20Logic.ipynb)
* [Running a Secure Public Notebook Server](../examples/Notebook/Running%20the%20Notebook%20Server.ipynb#Securing-the-notebook-server)
* [How Jupyter works](../examples/Notebook/Multiple%20Languages%2C%20Frontends.ipynb) to run code in different languages.

You can also get this tutorial and run it on your laptop:

    git clone https://github.com/ipython/ipython-in-depth

Install IPython and Jupyter:

with [conda](https://www.anaconda.com/download):

    conda install ipython jupyter

with pip:

    # first, always upgrade pip!
    pip install --upgrade pip
    pip install --upgrade ipython jupyter

Start the notebook in the tutorial directory:

    cd ipython-in-depth
    jupyter notebook