# Welcome to Jupyter!

# Creating a dataframe

In [1]:
import pandas as pd

purchase_1 = pd.Series({'name':'masood',
                       'item':'bat',
                       'cost':20})

purchase_2 = pd.Series({'name':'hitesh',
                       'item':'box',
                       'cost':30})

purchase_3 = pd.Series({'name':'yash',
                       'item':'wicket',
                       'cost':40})

print(purchase_1)
print()
print(purchase_2)
print()
print(purchase_3)

name    masood
item       bat
cost        20
dtype: object

name    hitesh
item       box
cost        30
dtype: object

name      yash
item    wicket
cost        40
dtype: object


# Dataframe

In [2]:
# Note that there are 2purchases at the same store 1
df = pd.DataFrame([purchase_1,purchase_2,purchase_3], index=['store 1', 'store 1','store 2'])
df

Unnamed: 0,name,item,cost
store 1,masood,bat,20
store 1,hitesh,box,30
store 2,yash,wicket,40


In [3]:
# Let's see the purchases made at store 2

df.loc['store 2']

name      yash
item    wicket
cost        40
Name: store 2, dtype: object

In [4]:
# Let's see the purchases made at store 1

df.loc['store 1']

Unnamed: 0,name,item,cost
store 1,masood,bat,20
store 1,hitesh,box,30


In [5]:
# Type checking

type(df.loc['store 1'])

pandas.core.frame.DataFrame

# Acessing the columns? Don't use loc

In [6]:
df['name']

store 1    masood
store 1    hitesh
store 2      yash
Name: name, dtype: object

# Selecting data on multiple axes

In [7]:
df.loc['store 1','cost']

store 1    20
store 1    30
Name: cost, dtype: int64

# How do we go about accessing columns?

## Method 1, use transpose and loc

In [8]:
df.T

Unnamed: 0,store 1,store 1.1,store 2
name,masood,hitesh,yash
item,bat,box,wicket
cost,20,30,40


In [9]:
df.T.loc['cost']

store 1    20
store 1    30
store 2    40
Name: cost, dtype: object

# Very Very Important observation. loc and iloc are reserved for row, for columns , use indexing directly

In [10]:
df['cost']

store 1    20
store 1    30
store 2    40
Name: cost, dtype: int64

# Chaining operations on data frames

In [11]:
df.loc['store 1']['cost']

store 1    20
store 1    30
Name: cost, dtype: int64

## Chaining is not generally suggested as it returns a copy of th edat instead of the view on th e data

# loc operator can do slicing easily

In [12]:
df.loc[:,['name','cost']]

Unnamed: 0,name,cost
store 1,masood,20
store 1,hitesh,30
store 2,yash,40


# Deleting row from data frame

In [13]:
df.drop('store 2')

Unnamed: 0,name,item,cost
store 1,masood,bat,20
store 1,hitesh,box,30


In [14]:
# Note that the original data frame is not changed

df

Unnamed: 0,name,item,cost
store 1,masood,bat,20
store 1,hitesh,box,30
store 2,yash,wicket,40


# Copying a  data frame

In [15]:
copy_df = df.copy()
copy_df = copy_df.drop('store 2')
copy_df

Unnamed: 0,name,item,cost
store 1,masood,bat,20
store 1,hitesh,box,30


# Overloading drop to drop row/column or/and drop in place

# Dropping columns driectly in place

In [16]:
del copy_df['name']
copy_df

Unnamed: 0,item,cost
store 1,bat,20
store 1,box,30


# Adding a  new column

In [17]:
df['Location'] = None
df

Unnamed: 0,name,item,cost,Location
store 1,masood,bat,20,
store 1,hitesh,box,30,
store 2,yash,wicket,40,


# Broadcasting in columns

In [18]:
# Apply a 20% discount on all the items
df['cost'] *=0.8

In [19]:
df

Unnamed: 0,name,item,cost,Location
store 1,masood,bat,16.0,
store 1,hitesh,box,24.0,
store 2,yash,wicket,32.0,


# Dataframe indexing and loading

In [20]:
# Create  a series using projections

costs = df['cost'];
costs

store 1    16.0
store 1    24.0
store 2    32.0
Name: cost, dtype: float64

In [21]:
# Broadcast the values in cost to increase by 2

costs +=2
costs

store 1    18.0
store 1    26.0
store 2    34.0
Name: cost, dtype: float64

In [22]:
# Let's see if the original data frame has changed or not?
df

Unnamed: 0,name,item,cost,Location
store 1,masood,bat,18.0,
store 1,hitesh,box,26.0,
store 2,yash,wicket,34.0,


# Beware, the original df has changed

## Use copy explicitly whenver required

In [23]:
!cat olympics.csv

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12
Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480
Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304
Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26
Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12
Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1
Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1
Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90
Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147
Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1
Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4
Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,

# Reading csv files

In [24]:
df = pd.read_csv('olympics.csv')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total
1,Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
2,Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
3,Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
4,Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12


In [25]:
# Making custom rows and columns by skipping a row

df = pd.read_csv('olympics.csv', index_col=0, skiprows=1)
df.head()

Unnamed: 0,№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !.1,02 !.1,03 !.1,Total.1,№ Games,01 !.2,02 !.2,03 !.2,Combined total
Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2
Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15
Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70
Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12
Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12


# Accessing all the column names

In [26]:
df.columns

Index(['№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter', '01 !.1',
       '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2', '03 !.2',
       'Combined total'],
      dtype='object')

# How to rename columns? Confusion!

This repo contains an introduction to [Jupyter](https://jupyter.org) and [IPython](https://ipython.org).

Outline of some basics:

* [Notebook Basics](../examples/Notebook/Notebook%20Basics.ipynb)
* [IPython - beyond plain python](../examples/IPython%20Kernel/Beyond%20Plain%20Python.ipynb)
* [Markdown Cells](../examples/Notebook/Working%20With%20Markdown%20Cells.ipynb)
* [Rich Display System](../examples/IPython%20Kernel/Rich%20Output.ipynb)
* [Custom Display logic](../examples/IPython%20Kernel/Custom%20Display%20Logic.ipynb)
* [Running a Secure Public Notebook Server](../examples/Notebook/Running%20the%20Notebook%20Server.ipynb#Securing-the-notebook-server)
* [How Jupyter works](../examples/Notebook/Multiple%20Languages%2C%20Frontends.ipynb) to run code in different languages.

You can also get this tutorial and run it on your laptop:

    git clone https://github.com/ipython/ipython-in-depth

Install IPython and Jupyter:

with [conda](https://www.anaconda.com/download):

    conda install ipython jupyter

with pip:

    # first, always upgrade pip!
    pip install --upgrade pip
    pip install --upgrade ipython jupyter

Start the notebook in the tutorial directory:

    cd ipython-in-depth
    jupyter notebook