# PANDAS:

A pandas series is similar to numpy arrays but with more functionality.

In [15]:
import numpy as np
import pandas as pd

In [16]:
a = np.array([1,2,3,4,5,7])
series = pd.Series([1,2,3,4,5,7])     # These function is not available in numpy arrays
print(series.describe(),'\n')

count    6.000000
mean     3.666667
std      2.160247
min      1.000000
25%      2.250000
50%      3.500000
75%      4.750000
max      7.000000
dtype: float64 



In [17]:
series = pd.Series([1,2,3,4,5,7]) 
print(series[0],'\n')               # One can access using the same way like numpy array.
print(series[:2],'\n')              # Same slicing notation
print(series,'\n')

1 

0    1
1    2
dtype: int64 

0    1
1    2
2    3
3    4
4    5
5    7
dtype: int64 



In [18]:
series = pd.Series([1,2,3,4,5,7])
print('\n')

print('mean:',series.mean())
print('std:',series.std())
print('max:',series.max())
print('count:',series.count())



mean: 3.6666666666666665
std: 2.160246899469287
max: 7
count: 6


# Vectorized operations and index arrays:

In [19]:
a = pd.Series([1,2,3,4,5])
b = pd.Series([1,2,1,6,7])

print(a+b)
print(a*2)
print(a>=2)
print(a[a>=3])   

0     2
1     4
2     4
3    10
4    12
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64
0    False
1     True
2     True
3     True
4     True
dtype: bool
2    3
3    4
4    5
dtype: int64


# Pandas Series Index:

In [31]:
users = pd.Series([67890000,536744888,46237384949],index=["Whatspp","Facebook","Instagram"])
print(users)

# Numpy arrays of like superman version of lists.
# A pandas series is like a mix of list and dictionary.

print('users[0]=',users[0])
print('users Facebook = ',users['Facebook'])

Whatspp         67890000
Facebook       536744888
Instagram    46237384949
dtype: int64
users[0]= 67890000
users Facebook =  536744888


# Pandas advantages over numpy:

In [34]:
# How we will do the same thing in numpy:

users = np.array([89374980,72362357,18328773726])
index = np.array(["Whatapp","instagram","Facebook"])
print(users)

# Numpy arrays of like superman version of lists.
# A pandas series is like a mix of list and dictionary.

print('users of {} is {}'.format(index[1],users[1]))

[   89374980    72362357 18328773726]
users of instagram is 72362357


# Pandas loc and iloc:

In [35]:
nums = pd.Series([12,45,66,89])
print(nums)

0    12
1    45
2    66
3    89
dtype: int64


In [37]:
users = pd.Series([67890000,536744888,46237384949],index=["Whatspp","Facebook","Instagram"])

print(users[0])    # Accessing using no. without iloc.

print(users.iloc[0])  # Accessing using number index.

print(users.loc['Whatspp'])   # Accessing using index.

67890000
67890000
67890000


# pandas example finding max:

In [43]:
# finding out maximum users:

print(pd.__version__)

users = pd.Series([67890000,536744888,46237384949],index=["Whatspp","Facebook","Instagram"])

print("social media with maximum users=",users.values.argmax())

print("The maximum users of social media= {} and users = {}".format(index[users.values.argmax()],
                                         users.iloc[users.values.argmax()]))

0.25.1
social media with maximum users= 2
The maximum users of social media= Facebook and users = 46237384949


# Pandas series additon:

In [48]:
# Addition when index are same.

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = pd.Series([2,7,9,1],index=['a','b','c','d'])
print(s1+s2)

a     3
b     9
c    12
d     5
dtype: int64


In [50]:
# Index have same elements in different order:

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = pd.Series([2,7,9,1],index=['d','b','a','c'])
print(s1+s2)

a    NaN
b    9.0
c    NaN
d    6.0
e    NaN
f    NaN
dtype: float64


In [51]:
# Index overlap,but do not have exactly the same element:

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = pd.Series([2,7,9,1],index=['d','b','f','e'])
print(s1+s2)

a    NaN
b    9.0
c    NaN
d    6.0
e    NaN
f    NaN
dtype: float64


In [53]:
# Index do not overlap:

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = pd.Series([2,7,9,1],index=['h','g','f','e'])
print(s1+s2)

a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
f   NaN
g   NaN
h   NaN
dtype: float64


In [54]:
# using dropna:

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s2 = pd.Series([2,7,9,1],index=['d','g','f','a'])       
result = s1+s2
print(result)
print(result.dropna())                                     # dropna is used for droping null values.

a    2.0
b    NaN
c    NaN
d    6.0
f    NaN
g    NaN
dtype: float64
a    2.0
d    6.0
dtype: float64


# pandas Apply  function:



In [58]:
# Using pandas apply function:

def make_capital(str):
    return str.capitalize()

s1 = pd.Series(['India','china','france'],index = ['a','b','c'])
s2 = s1.apply(make_capital)

print(s2)

a     India
b     China
c    France
dtype: object


In [59]:
# Pandas DataFrames introduction:

country = pd.DataFrame({
    'country':['India','china','usa'],
    'Population':[12239887784,6737368379,928093738],
    'capital':['Delhi','Bejing','France']
})

print(country)

print(country.mean())

  country   Population capital
0   India  12239887784   Delhi
1   china   6737368379  Bejing
2     usa    928093738  France
Population    6.635117e+09
dtype: float64


# Pandas DataFrame index,iloc and loc:

In [72]:
country = pd.DataFrame({
    'gdp':[7467733,6736883,682889973],
    'Population':[12239887784,6737368379,928093738],
    'capital':['Delhi','Bejing','France']
},
index = ['India','china','Usa'])

print(country,'\n')

print(country.loc['Usa'],'\n')

print(country.iloc[1])

             gdp   Population capital
India    7467733  12239887784   Delhi
china    6736883   6737368379  Bejing
Usa    682889973    928093738  France 

gdp           682889973
Population    928093738
capital          France
Name: Usa, dtype: object 

gdp              6736883
Population    6737368379
capital           Bejing
Name: china, dtype: object


# Pandas sum along axis:

In [73]:
country = pd.DataFrame({
    'gdp':[7467733,6736883,682889973],
    'Population':[12239887784,6737368379,928093738],
    'capital':['Delhi','Bejing','France']
},
index = ['India','china','Usa'])
 
print(country.values)               # Output as numpy values.

[[7467733 12239887784 'Delhi']
 [6736883 6737368379 'Bejing']
 [682889973 928093738 'France']]


In [74]:
# Pandas axis:

df = pd.DataFrame({'A':[9,2,6], 'B':[3,8,1]})

print(df,'\n')

print(df.sum(),'\n')

print(df.sum(axis = 0),'\n')               # row = 0

print(df.sum(axis = 1), '\n')              # row = 1

   A  B
0  9  3
1  2  8
2  6  1 

A    17
B    12
dtype: int64 

A    17
B    12
dtype: int64 

0    12
1    10
2     7
dtype: int64 



# Pandas DataFrame Addition:

In [92]:
# EX:- vectorized operation on DataFrame.

# Adding DataFrame with column name.

df1 = pd.DataFrame({'a':[9,2,6], 'b':[3,8,1], 'c':[5,7,9]})
df2 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':[7,8,9]})
print(df1+df2,'\n')

# Adding DataFrame with overlapping column name.

df1 = pd.DataFrame({'a':[9,2,6], 'b':[3,8,1], 'c':[5,7,9]})
df2 = pd.DataFrame({'d':[1,2,3], 'a':[4,5,6], 'c':[7,8,9]})
print(df1+df2,'\n')

#Adding DataFrame with overlapping row index.

df1 = pd.DataFrame({'a':[9,2,6], 'b':[3,8,1], 'c':[5,7,9]},index=['r1','r2','r3'])
df2 = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6], 'c':[7,8,9]},index=['r4','r5','r1'])
print(df1+df2,'\n')
      



    a   b   c
0  10   7  12
1   4  13  15
2   9   7  18 

    a   b   c   d
0  13 NaN  12 NaN
1   7 NaN  15 NaN
2  12 NaN  18 NaN 

       a    b     c
r1  12.0  9.0  14.0
r2   NaN  NaN   NaN
r3   NaN  NaN   NaN
r4   NaN  NaN   NaN
r5   NaN  NaN   NaN 



# Pandas DataFrame Applymap:

In [97]:
# applymap() : applies a function to every single element in the entire dataframe.

# DataFrame applymap()

df = pd.DataFrame({
                'a':[1,2,3],
                'b':[4,5,6],
                'c':[7,8,9]
            })
    
def add_one(x):
    return x+1
    
print(df.applymap(add_one))    
    

   a  b   c
0  2  5   8
1  3  6   9
2  4  7  10
