# Pandas codes

# Part - 1

In [60]:
import numpy as np
import pandas as pd

In [61]:
pd.set_option('display.max_columns',85)
pd.set_option('display.max_rows',85)

### Creating Dictionaries and inserting many lists of values inside each key

In [62]:
people = {
    "first" : ["John","Alex","Tom"],
    "last" : ["Willy","Rider","Wagner"],
    "email" : ["john@gmail.com","alex@yahoo.com","tom@gmail.com"]
}
people

{'first': ['John', 'Alex', 'Tom'],
 'last': ['Willy', 'Rider', 'Wagner'],
 'email': ['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com']}

In [63]:
people['email'], people['first'], people['last']

(['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com'],
 ['John', 'Alex', 'Tom'],
 ['Willy', 'Rider', 'Wagner'])

In [64]:
for key, values in people.items():
    print(key)
    print(values)

first
['John', 'Alex', 'Tom']
last
['Willy', 'Rider', 'Wagner']
email
['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com']


In [65]:
for key in people.keys():
    print(key)

first
last
email


In [66]:
for i in people.values():
    print(i)

['John', 'Alex', 'Tom']
['Willy', 'Rider', 'Wagner']
['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com']


### Consider the DataFrames as a dictionary of lists !
Where the list of arrays must have the same length across all the keys....

In [79]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [22]:
df[['last','email']]

Unnamed: 0,last,email
0,Willy,john@gmail.com
1,Rider,alex@yahoo.com
2,Wagner,tom@gmail.com


### Consider the Series as the rows of Data ! it contains a single column

Data Frame is a collection of multiple Series...

In [23]:
type(df['email'])

pandas.core.series.Series

In [24]:
df[['email','first']]

Unnamed: 0,email,first
0,john@gmail.com,John
1,alex@yahoo.com,Alex
2,tom@gmail.com,Tom


In [68]:
df.columns.sort_values()

Index(['email', 'first', 'last'], dtype='object')

In [19]:
df.iloc[[0,1], [2]]

Unnamed: 0,email
0,john@gmail.com
1,alex@yahoo.com


### iloc --- means search by integer location

In [39]:
df.iloc[1:2]

Unnamed: 0,first,last,email
1,Alex,Rider,alex@yahoo.com


In [43]:
df.iloc[1]

first              Alex
last              Rider
email    alex@yahoo.com
Name: 1, dtype: object

In [28]:
df.iloc[[0]] # show the series with only one row of data

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com


In [21]:
df.iloc[[0,1], [2]] # (rows, columns)

Unnamed: 0,email
0,john@gmail.com
1,alex@yahoo.com


### loc ---- means search by the location

In [22]:
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [23]:
df.loc[[0,1], ['email']] # Note we are passing string as 2nd argument for columns

Unnamed: 0,email
0,john@gmail.com
1,alex@yahoo.com


# Part - 2

## Indexes

In [80]:
df.head()

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


### set_index & reset_index

In [70]:
s1 = pd.Series([1,2,3,4,5,6])
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [81]:
df.columns


Index(['first', 'last', 'email'], dtype='object')

In [82]:
df.set_index('email',inplace=True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
john@gmail.com,John,Willy
alex@yahoo.com,Alex,Rider
tom@gmail.com,Tom,Wagner


In [83]:
df.index.sort_values(ascending=False)

Index(['tom@gmail.com', 'john@gmail.com', 'alex@yahoo.com'], dtype='object', name='email')

In [84]:
df.loc['john@gmail.com']['last']

'Willy'

In [85]:
df.loc['john@gmail.com','last']

'Willy'

In [86]:
df.reset_index(inplace=True)

In [87]:
df.head()

Unnamed: 0,email,first,last
0,john@gmail.com,John,Willy
1,alex@yahoo.com,Alex,Rider
2,tom@gmail.com,Tom,Wagner


# Part - 3

## Filtering rows & columns

In [88]:
people

{'first': ['John', 'Alex', 'Tom'],
 'last': ['Willy', 'Rider', 'Wagner'],
 'email': ['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com']}

In [89]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [91]:
filt = (df['last']=='Willy') & (df['first']=='John') # returns True and False values
filt

0     True
1    False
2    False
dtype: bool

In [97]:
filt = (df['last']=='Willy') | (df['first']=='John') # returns True and False values

In [96]:
df[~filt] # ~ sign represents not symbol

Unnamed: 0,first,last,email
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [57]:
df.loc[filt,'email'] # same thing as above result

0    john@gmail.com
Name: email, dtype: object

# Part-5

In [123]:
people = {
    "first name" : ["John","Alex","Tom"],
    "last name" : ["Willy","Rider","Wagner"],
    "email" : ["john@gmail.com","alex@yahoo.com","tom@gmail.com"]
}
people

{'first name': ['John', 'Alex', 'Tom'],
 'last name': ['Willy', 'Rider', 'Wagner'],
 'email': ['john@gmail.com', 'alex@yahoo.com', 'tom@gmail.com']}

In [124]:
df = pd.DataFrame(people)
df.head()

Unnamed: 0,first name,last name,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [117]:
df.columns = ['pehla', 'dusra', 'teesra']

In [118]:
df.head()

Unnamed: 0,pehla,dusra,teesra
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [None]:
### want to upper case all the column names then 

In [125]:
df.columns = [x.upper() for x in df.columns]

In [126]:
df.head()

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [134]:
# replace all the underscore with the spaces then
df.columns = df.columns.str.replace(' ', '')
df

Unnamed: 0,FIRSTNAME,LASTNAME,EMAIL
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


In [137]:
df.rename(columns={'FIRSTNAME' : 'first', 'LASTNAME' : 'last', 'EMAIL' : 'email' }, inplace=True)

In [138]:
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,Tom,Wagner,tom@gmail.com


### Updating Rows

In [143]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@gmail.com']

In [144]:
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,John,Smith,JohnSmith@gmail.com


In [148]:
df.loc[2, ['last', 'email']] = ['Jane', 'Jane@email.com'] # for single values

In [149]:
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,John,Jane,Jane@email.com


In [150]:
df.at[2, ['last', 'email']] = ['Jane', 'Jane@email.com'] # for single values

In [151]:
df

Unnamed: 0,first,last,email
0,John,Willy,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,John,Jane,Jane@email.com


In [154]:
### Always whenever you want to assign the values then use the loc or iloc otherwise you will face error for sure
filt = (df['last']=='Willy')
filt


0     True
1    False
2    False
Name: last, dtype: bool

In [157]:
df[filt]['last'] = 'abs' # This is the error we will get if we dont use the loc or the iloc keyword

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [160]:
df.loc[filt, 'last'] = 'Maity'

In [161]:
df

Unnamed: 0,first,last,email
0,John,Maity,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,John,Jane,Jane@email.com


In [165]:
# updating multiple rows of the data like email column

df['email'] = df['email'].str.upper()

In [166]:
df

Unnamed: 0,first,last,email
0,John,Maity,JOHN@GMAIL.COM
1,Alex,Rider,ALEX@YAHOO.COM
2,John,Jane,JANE@EMAIL.COM


## Major 4 methods..
1. apply
2. map
3. applymap
4. replace

1. apply is used for calling a function on our values. and works on DataFrame or Series both

In [172]:
df['email'].apply(len)

0    14
1    14
2    14
Name: email, dtype: int64

In [176]:
def update_email(email):
    return email.lower()

In [179]:
df['email'] = df['email'].apply(update_email)

In [180]:
df

Unnamed: 0,first,last,email
0,John,Maity,john@gmail.com
1,Alex,Rider,alex@yahoo.com
2,John,Jane,jane@email.com


In [182]:
df['email'] = df['email'].apply(lambda x : x.upper())
df

Unnamed: 0,first,last,email
0,John,Maity,JOHN@GMAIL.COM
1,Alex,Rider,ALEX@YAHOO.COM
2,John,Jane,JANE@EMAIL.COM


In [185]:
### Applying the apply function to all the DataFrame will only apply on each series and not the rows !!

In [186]:
df.apply(len) 

first    3
last     3
email    3
dtype: int64

In [188]:
len(df['email'])

3

In [189]:
# Want to find out minimum value from each column...so do it on Series it will be better that doing in on DataFrame
df['email'].apply(min)

0    .
1    .
2    .
Name: email, dtype: object

In [190]:
df.apply(pd.Series.min)

first              Alex
last               Jane
email    ALEX@YAHOO.COM
dtype: object

In [192]:
df.apply(lambda x : x.min()) ## bcoz lambda always works on Series !!!

first              Alex
last               Jane
email    ALEX@YAHOO.COM
dtype: object

Applying the apply function on Series changes each and every value in the series but when apply function on the DataFrame then it will make change to every series.

Applymap() method is only used on the DataFrame so that we can change each and every value on the Data Frame

In [193]:
df.applymap(len)

Unnamed: 0,first,last,email
0,4,5,14
1,4,5,14
2,4,4,14


In [194]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,john,maity,john@gmail.com
1,alex,rider,alex@yahoo.com
2,john,jane,jane@email.com


Map() is only used on a Series... It is mainly used for substituting every values on a Series

In [195]:
df['first'].map({'john' : 'Corey', 'alex' : 'Schafer'}) # it will apply and other values it will replace with NaN values

0    NaN
1    NaN
2    NaN
Name: first, dtype: object

Use the replace() method if you dont want to fill those NaN values if those values in the dictionaries dosent find it


In [199]:
df

Unnamed: 0,first,last,email
0,John,Maity,JOHN@GMAIL.COM
1,Alex,Rider,ALEX@YAHOO.COM
2,John,Jane,JANE@EMAIL.COM


In [200]:
df['first'].replace({'John' : 'Corey', 'Alex' : 'Schafer'})

0      Corey
1    Schafer
2      Corey
Name: first, dtype: object